2025-12-01
This commit is contained in:
@@ -0,0 +1,7 @@
|
||||
"""
|
||||
Default plugins for rdflib.
|
||||
|
||||
This is a namespace package and contains the default plugins for
|
||||
rdflib.
|
||||
|
||||
"""
|
||||
@@ -0,0 +1,19 @@
|
||||
from rdflib.namespace import RDF # noqa: N999
|
||||
from rdflib.term import URIRef
|
||||
|
||||
|
||||
class RDFVOC(RDF):
|
||||
_underscore_num = True
|
||||
_fail = True
|
||||
|
||||
# http://www.w3.org/TR/rdf-syntax-grammar/#eventterm-attribute-URI
|
||||
# A mapping from unqualified terms to their qualified version.
|
||||
RDF: URIRef
|
||||
Description: URIRef
|
||||
ID: URIRef
|
||||
about: URIRef
|
||||
parseType: URIRef # noqa: N815
|
||||
resource: URIRef
|
||||
li: URIRef
|
||||
nodeID: URIRef # noqa: N815
|
||||
datatype: URIRef
|
||||
@@ -0,0 +1,3 @@
|
||||
"""
|
||||
|
||||
"""
|
||||
@@ -0,0 +1,172 @@
|
||||
"""
|
||||
This is a rdflib plugin for parsing Hextuple files, which are Newline-Delimited JSON
|
||||
(ndjson) files, into Conjunctive. The store that backs the graph *must* be able to
|
||||
handle contexts, i.e. multiple graphs.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import warnings
|
||||
from io import TextIOWrapper
|
||||
from typing import TYPE_CHECKING, Any, BinaryIO, List, Optional, TextIO, Union
|
||||
|
||||
from rdflib.graph import ConjunctiveGraph, Dataset, Graph
|
||||
from rdflib.parser import InputSource, Parser
|
||||
from rdflib.term import BNode, Literal, URIRef
|
||||
|
||||
try:
|
||||
import orjson
|
||||
|
||||
_HAS_ORJSON = True
|
||||
except ImportError:
|
||||
orjson = None # type: ignore[assignment, unused-ignore]
|
||||
_HAS_ORJSON = False
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from io import BufferedReader
|
||||
|
||||
__all__ = ["HextuplesParser"]
|
||||
|
||||
|
||||
class HextuplesParser(Parser):
|
||||
"""
|
||||
An RDFLib parser for Hextuples
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super(HextuplesParser, self).__init__()
|
||||
self.default_context: Optional[Graph] = None
|
||||
self.skolemize = False
|
||||
|
||||
def _parse_hextuple(
|
||||
self, ds: Union[Dataset, ConjunctiveGraph], tup: List[Union[str, None]]
|
||||
) -> None:
|
||||
# all values check
|
||||
# subject, predicate, value, datatype cannot be None
|
||||
# language and graph may be None
|
||||
if tup[0] is None or tup[1] is None or tup[2] is None or tup[3] is None:
|
||||
raise ValueError(
|
||||
f"subject, predicate, value, datatype cannot be None. Given: {tup}"
|
||||
)
|
||||
|
||||
# 1 - subject
|
||||
s: Union[URIRef, BNode]
|
||||
if tup[0].startswith("_"):
|
||||
s = BNode(value=tup[0].replace("_:", ""))
|
||||
if self.skolemize:
|
||||
s = s.skolemize()
|
||||
else:
|
||||
s = URIRef(tup[0])
|
||||
|
||||
# 2 - predicate
|
||||
p = URIRef(tup[1])
|
||||
|
||||
# 3 - value
|
||||
o: Union[URIRef, BNode, Literal]
|
||||
if tup[3] == "globalId":
|
||||
o = URIRef(tup[2])
|
||||
elif tup[3] == "localId":
|
||||
o = BNode(value=tup[2].replace("_:", ""))
|
||||
if self.skolemize:
|
||||
o = o.skolemize()
|
||||
else: # literal
|
||||
if tup[4] is None:
|
||||
o = Literal(tup[2], datatype=URIRef(tup[3]))
|
||||
else:
|
||||
o = Literal(tup[2], lang=tup[4])
|
||||
|
||||
# 6 - context
|
||||
if tup[5] is not None:
|
||||
c = (
|
||||
BNode(tup[5].replace("_:", ""))
|
||||
if tup[5].startswith("_:")
|
||||
else URIRef(tup[5])
|
||||
)
|
||||
if isinstance(c, BNode) and self.skolemize:
|
||||
c = c.skolemize()
|
||||
|
||||
ds.get_context(c).add((s, p, o))
|
||||
elif self.default_context is not None:
|
||||
self.default_context.add((s, p, o))
|
||||
else:
|
||||
raise Exception("No context to parse into!")
|
||||
|
||||
# type error: Signature of "parse" incompatible with supertype "Parser"
|
||||
def parse(self, source: InputSource, graph: Graph, skolemize: bool = False, **kwargs: Any) -> None: # type: ignore[override]
|
||||
if kwargs.get("encoding") not in [None, "utf-8"]:
|
||||
warnings.warn(
|
||||
f"Hextuples files are always utf-8 encoded, "
|
||||
f"I was passed: {kwargs.get('encoding')}, "
|
||||
"but I'm still going to use utf-8"
|
||||
)
|
||||
|
||||
assert (
|
||||
graph.store.context_aware
|
||||
), "Hextuples Parser needs a context-aware store!"
|
||||
|
||||
self.skolemize = skolemize
|
||||
# Set default_union to True to mimic ConjunctiveGraph behavior
|
||||
ds = Dataset(store=graph.store, default_union=True)
|
||||
ds_default = ds.default_context # the DEFAULT_DATASET_GRAPH_ID
|
||||
if isinstance(graph, (Dataset, ConjunctiveGraph)):
|
||||
self.default_context = graph.default_context
|
||||
elif graph.identifier is not None:
|
||||
if graph.identifier == ds_default.identifier:
|
||||
self.default_context = graph
|
||||
else:
|
||||
self.default_context = ds.get_context(graph.identifier)
|
||||
else:
|
||||
# mypy thinks this is unreachable, but graph.identifier can be None
|
||||
self.default_context = ds_default # type: ignore[unreachable]
|
||||
if self.default_context is not ds_default:
|
||||
ds.default_context = self.default_context
|
||||
ds.remove_graph(ds_default) # remove the original unused default graph
|
||||
|
||||
try:
|
||||
text_stream: Optional[TextIO] = source.getCharacterStream()
|
||||
except (AttributeError, LookupError):
|
||||
text_stream = None
|
||||
try:
|
||||
binary_stream: Optional[BinaryIO] = source.getByteStream()
|
||||
except (AttributeError, LookupError):
|
||||
binary_stream = None
|
||||
|
||||
if text_stream is None and binary_stream is None:
|
||||
raise ValueError(
|
||||
f"Source does not have a character stream or a byte stream and cannot be used {type(source)}"
|
||||
)
|
||||
if TYPE_CHECKING:
|
||||
assert text_stream is not None or binary_stream is not None
|
||||
use_stream: Union[TextIO, BinaryIO]
|
||||
if _HAS_ORJSON:
|
||||
if binary_stream is not None:
|
||||
use_stream = binary_stream
|
||||
else:
|
||||
if TYPE_CHECKING:
|
||||
assert isinstance(text_stream, TextIOWrapper)
|
||||
use_stream = text_stream
|
||||
loads = orjson.loads
|
||||
else:
|
||||
if text_stream is not None:
|
||||
use_stream = text_stream
|
||||
else:
|
||||
if TYPE_CHECKING:
|
||||
assert isinstance(binary_stream, BufferedReader)
|
||||
use_stream = TextIOWrapper(binary_stream, encoding="utf-8")
|
||||
loads = json.loads
|
||||
|
||||
for line in use_stream: # type: Union[str, bytes]
|
||||
if len(line) == 0 or line.isspace():
|
||||
# Skipping empty lines because this is what was being done before for the first and last lines, albeit in an rather indirect way.
|
||||
# The result is that we accept input that would otherwise be invalid.
|
||||
# Possibly we should just let this result in an error.
|
||||
continue
|
||||
# this complex handing is because the 'value' component is
|
||||
# allowed to be "" but not None
|
||||
# all other "" values are treated as None
|
||||
raw_line: List[str] = loads(line)
|
||||
hex_tuple_line = [x if x != "" else None for x in raw_line]
|
||||
if raw_line[2] == "":
|
||||
hex_tuple_line[2] = ""
|
||||
self._parse_hextuple(ds, hex_tuple_line)
|
||||
@@ -0,0 +1,712 @@
|
||||
"""
|
||||
This parser will interpret a JSON-LD document as an RDF Graph. See:
|
||||
|
||||
http://json-ld.org/
|
||||
|
||||
Example usage::
|
||||
|
||||
>>> from rdflib import Graph, URIRef, Literal
|
||||
>>> test_json = '''
|
||||
... {
|
||||
... "@context": {
|
||||
... "dc": "http://purl.org/dc/terms/",
|
||||
... "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
|
||||
... "rdfs": "http://www.w3.org/2000/01/rdf-schema#"
|
||||
... },
|
||||
... "@id": "http://example.org/about",
|
||||
... "dc:title": {
|
||||
... "@language": "en",
|
||||
... "@value": "Someone's Homepage"
|
||||
... }
|
||||
... }
|
||||
... '''
|
||||
>>> g = Graph().parse(data=test_json, format='json-ld')
|
||||
>>> list(g) == [(URIRef('http://example.org/about'),
|
||||
... URIRef('http://purl.org/dc/terms/title'),
|
||||
... Literal("Someone's Homepage", lang='en'))]
|
||||
True
|
||||
|
||||
"""
|
||||
|
||||
# From: https://github.com/RDFLib/rdflib-jsonld/blob/feature/json-ld-1.1/rdflib_jsonld/parser.py
|
||||
|
||||
# NOTE: This code reads the entire JSON object into memory before parsing, but
|
||||
# we should consider streaming the input to deal with arbitrarily large graphs.
|
||||
from __future__ import annotations
|
||||
|
||||
import secrets
|
||||
import warnings
|
||||
from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Union
|
||||
|
||||
import rdflib.parser
|
||||
from rdflib.graph import ConjunctiveGraph, Graph
|
||||
from rdflib.namespace import RDF, XSD
|
||||
from rdflib.parser import InputSource, URLInputSource
|
||||
from rdflib.term import BNode, IdentifiedNode, Literal, Node, URIRef
|
||||
|
||||
from ..shared.jsonld.context import UNDEF, Context, Term
|
||||
from ..shared.jsonld.keys import (
|
||||
CONTEXT,
|
||||
GRAPH,
|
||||
ID,
|
||||
INCLUDED,
|
||||
INDEX,
|
||||
JSON,
|
||||
LANG,
|
||||
LIST,
|
||||
NEST,
|
||||
NONE,
|
||||
REV,
|
||||
SET,
|
||||
TYPE,
|
||||
VALUE,
|
||||
VOCAB,
|
||||
)
|
||||
from ..shared.jsonld.util import (
|
||||
_HAS_ORJSON,
|
||||
VOCAB_DELIMS,
|
||||
context_from_urlinputsource,
|
||||
json,
|
||||
orjson,
|
||||
source_to_json,
|
||||
)
|
||||
|
||||
__all__ = ["JsonLDParser", "to_rdf"]
|
||||
|
||||
TYPE_TERM = Term(str(RDF.type), TYPE, VOCAB) # type: ignore[call-arg]
|
||||
|
||||
ALLOW_LISTS_OF_LISTS = True # NOTE: Not allowed in JSON-LD 1.0
|
||||
|
||||
|
||||
class JsonLDParser(rdflib.parser.Parser):
|
||||
def __init__(self):
|
||||
super(JsonLDParser, self).__init__()
|
||||
|
||||
def parse(
|
||||
self,
|
||||
source: InputSource,
|
||||
sink: Graph,
|
||||
version: float = 1.1,
|
||||
skolemize: bool = False,
|
||||
encoding: Optional[str] = "utf-8",
|
||||
base: Optional[str] = None,
|
||||
context: Optional[
|
||||
Union[
|
||||
List[Union[Dict[str, Any], str, None]],
|
||||
Dict[str, Any],
|
||||
str,
|
||||
]
|
||||
] = None,
|
||||
generalized_rdf: Optional[bool] = False,
|
||||
extract_all_scripts: Optional[bool] = False,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""Parse JSON-LD from a source document.
|
||||
|
||||
The source document can be JSON or HTML with embedded JSON script
|
||||
elements (type attribute = "application/ld+json"). To process as HTML
|
||||
``source.content_type`` must be set to "text/html" or
|
||||
"application/xhtml+xml".
|
||||
|
||||
:param source: InputSource with JSON-formatted data (JSON or HTML)
|
||||
|
||||
:param sink: Graph to receive the parsed triples
|
||||
|
||||
:param version: parse as JSON-LD version, defaults to 1.1
|
||||
|
||||
:param encoding: character encoding of the JSON (should be "utf-8"
|
||||
or "utf-16"), defaults to "utf-8"
|
||||
|
||||
:param base: JSON-LD `Base IRI <https://www.w3.org/TR/json-ld/#base-iri>`_, defaults to None
|
||||
|
||||
:param context: JSON-LD `Context <https://www.w3.org/TR/json-ld/#the-context>`_, defaults to None
|
||||
|
||||
:param generalized_rdf: parse as `Generalized RDF <https://www.w3.org/TR/json-ld/#relationship-to-rdf>`_, defaults to False
|
||||
|
||||
:param extract_all_scripts: if source is an HTML document then extract
|
||||
all script elements, defaults to False (extract only the first
|
||||
script element). This is ignored if ``source.system_id`` contains
|
||||
a fragment identifier, in which case only the script element with
|
||||
matching id attribute is extracted.
|
||||
|
||||
"""
|
||||
if encoding not in ("utf-8", "utf-16"):
|
||||
warnings.warn(
|
||||
"JSON should be encoded as unicode. "
|
||||
"Given encoding was: %s" % encoding
|
||||
)
|
||||
|
||||
if not base:
|
||||
base = sink.absolutize(source.getPublicId() or source.getSystemId() or "")
|
||||
|
||||
context_data = context
|
||||
if not context_data and hasattr(source, "url") and hasattr(source, "links"):
|
||||
if TYPE_CHECKING:
|
||||
assert isinstance(source, URLInputSource)
|
||||
context_data = context_from_urlinputsource(source)
|
||||
|
||||
try:
|
||||
version = float(version)
|
||||
except ValueError:
|
||||
version = 1.1
|
||||
|
||||
# Get the optional fragment identifier
|
||||
try:
|
||||
fragment_id = URIRef(source.getSystemId()).fragment
|
||||
except Exception:
|
||||
fragment_id = None
|
||||
|
||||
data, html_base = source_to_json(source, fragment_id, extract_all_scripts)
|
||||
if html_base is not None:
|
||||
base = URIRef(html_base, base=base)
|
||||
|
||||
# NOTE: A ConjunctiveGraph parses into a Graph sink, so no sink will be
|
||||
# context_aware. Keeping this check in case RDFLib is changed, or
|
||||
# someone passes something context_aware to this parser directly.
|
||||
conj_sink: Graph
|
||||
if not sink.context_aware:
|
||||
conj_sink = ConjunctiveGraph(store=sink.store, identifier=sink.identifier)
|
||||
else:
|
||||
conj_sink = sink
|
||||
|
||||
to_rdf(
|
||||
data,
|
||||
conj_sink,
|
||||
base,
|
||||
context_data,
|
||||
version,
|
||||
bool(generalized_rdf),
|
||||
skolemize=skolemize,
|
||||
)
|
||||
|
||||
|
||||
def to_rdf(
|
||||
data: Any,
|
||||
dataset: Graph,
|
||||
base: Optional[str] = None,
|
||||
context_data: Optional[
|
||||
Union[
|
||||
List[Union[Dict[str, Any], str, None]],
|
||||
Dict[str, Any],
|
||||
str,
|
||||
]
|
||||
] = None,
|
||||
version: Optional[float] = None,
|
||||
generalized_rdf: bool = False,
|
||||
allow_lists_of_lists: Optional[bool] = None,
|
||||
skolemize: bool = False,
|
||||
):
|
||||
# TODO: docstring w. args and return value
|
||||
context = Context(base=base, version=version)
|
||||
if context_data:
|
||||
context.load(context_data)
|
||||
parser = Parser(
|
||||
generalized_rdf=generalized_rdf,
|
||||
allow_lists_of_lists=allow_lists_of_lists,
|
||||
skolemize=skolemize,
|
||||
)
|
||||
return parser.parse(data, context, dataset)
|
||||
|
||||
|
||||
class Parser:
|
||||
def __init__(
|
||||
self,
|
||||
generalized_rdf: bool = False,
|
||||
allow_lists_of_lists: Optional[bool] = None,
|
||||
skolemize: bool = False,
|
||||
):
|
||||
self.skolemize = skolemize
|
||||
self.generalized_rdf = generalized_rdf
|
||||
self.allow_lists_of_lists = (
|
||||
allow_lists_of_lists
|
||||
if allow_lists_of_lists is not None
|
||||
else ALLOW_LISTS_OF_LISTS
|
||||
)
|
||||
self.invalid_uri_to_bnode: dict[str, BNode] = {}
|
||||
|
||||
def parse(self, data: Any, context: Context, dataset: Graph) -> Graph:
|
||||
topcontext = False
|
||||
resources: Union[Dict[str, Any], List[Any]]
|
||||
if isinstance(data, list):
|
||||
resources = data
|
||||
elif isinstance(data, dict):
|
||||
local_context = data.get(CONTEXT)
|
||||
if local_context:
|
||||
context.load(local_context, context.base)
|
||||
topcontext = True
|
||||
resources = data
|
||||
# type error: Subclass of "Dict[str, Any]" and "List[Any]" cannot exist: would have incompatible method signatures
|
||||
if not isinstance(resources, list): # type: ignore[unreachable]
|
||||
resources = [resources]
|
||||
|
||||
if context.vocab:
|
||||
dataset.bind(None, context.vocab)
|
||||
for name, term in context.terms.items():
|
||||
if term.id and term.id.endswith(VOCAB_DELIMS):
|
||||
dataset.bind(name, term.id)
|
||||
|
||||
# type error: "Graph" has no attribute "default_context"
|
||||
graph = dataset.default_context if dataset.context_aware else dataset # type: ignore[attr-defined]
|
||||
|
||||
for node in resources:
|
||||
self._add_to_graph(dataset, graph, context, node, topcontext)
|
||||
|
||||
return graph
|
||||
|
||||
def _add_to_graph(
|
||||
self,
|
||||
dataset: Graph,
|
||||
graph: Graph,
|
||||
context: Context,
|
||||
node: Any,
|
||||
topcontext: bool = False,
|
||||
) -> Optional[Node]:
|
||||
if not isinstance(node, dict) or context.get_value(node):
|
||||
# type error: Return value expected
|
||||
return # type: ignore[return-value]
|
||||
|
||||
if CONTEXT in node and not topcontext:
|
||||
local_context = node[CONTEXT]
|
||||
if local_context:
|
||||
context = context.subcontext(local_context)
|
||||
else:
|
||||
context = Context(base=context.doc_base)
|
||||
|
||||
# type error: Incompatible types in assignment (expression has type "Optional[Context]", variable has type "Context")
|
||||
context = context.get_context_for_type(node) # type: ignore[assignment]
|
||||
|
||||
id_val = context.get_id(node)
|
||||
|
||||
if id_val is None:
|
||||
nested_id = self._get_nested_id(context, node)
|
||||
if nested_id is not None and len(nested_id) > 0:
|
||||
id_val = nested_id
|
||||
|
||||
if isinstance(id_val, str):
|
||||
subj = self._to_rdf_id(context, id_val)
|
||||
else:
|
||||
subj = BNode()
|
||||
if self.skolemize:
|
||||
subj = subj.skolemize()
|
||||
|
||||
if subj is None:
|
||||
return None
|
||||
|
||||
# NOTE: crude way to signify that this node might represent a named graph
|
||||
no_id = id_val is None
|
||||
|
||||
for key, obj in node.items():
|
||||
if key == CONTEXT or key in context.get_keys(ID):
|
||||
continue
|
||||
|
||||
if key == REV or key in context.get_keys(REV):
|
||||
for rkey, robj in obj.items():
|
||||
self._key_to_graph(
|
||||
dataset,
|
||||
graph,
|
||||
context,
|
||||
subj,
|
||||
rkey,
|
||||
robj,
|
||||
reverse=True,
|
||||
no_id=no_id,
|
||||
)
|
||||
else:
|
||||
self._key_to_graph(dataset, graph, context, subj, key, obj, no_id=no_id)
|
||||
|
||||
return subj
|
||||
|
||||
# type error: Missing return statement
|
||||
def _get_nested_id(self, context: Context, node: Dict[str, Any]) -> Optional[str]: # type: ignore[return]
|
||||
for key, obj in node.items():
|
||||
if context.version >= 1.1 and key in context.get_keys(NEST):
|
||||
term = context.terms.get(key)
|
||||
if term and term.id is None:
|
||||
continue
|
||||
objs = obj if isinstance(obj, list) else [obj]
|
||||
for obj in objs:
|
||||
if not isinstance(obj, dict):
|
||||
continue
|
||||
id_val = context.get_id(obj)
|
||||
if not id_val:
|
||||
subcontext = context.get_context_for_term(
|
||||
context.terms.get(key)
|
||||
)
|
||||
id_val = self._get_nested_id(subcontext, obj)
|
||||
if isinstance(id_val, str):
|
||||
return id_val
|
||||
|
||||
def _key_to_graph(
|
||||
self,
|
||||
dataset: Graph,
|
||||
graph: Graph,
|
||||
context: Context,
|
||||
subj: Node,
|
||||
key: str,
|
||||
obj: Any,
|
||||
reverse: bool = False,
|
||||
no_id: bool = False,
|
||||
) -> None:
|
||||
if isinstance(obj, list):
|
||||
obj_nodes = obj
|
||||
else:
|
||||
obj_nodes = [obj]
|
||||
|
||||
term = context.terms.get(key)
|
||||
if term:
|
||||
term_id = term.id
|
||||
if term.type == JSON:
|
||||
obj_nodes = [self._to_typed_json_value(obj)]
|
||||
elif LIST in term.container:
|
||||
obj_nodes = [self._expand_nested_list(obj_nodes)]
|
||||
elif isinstance(obj, dict):
|
||||
obj_nodes = self._parse_container(context, term, obj)
|
||||
else:
|
||||
term_id = None
|
||||
|
||||
if TYPE in (key, term_id):
|
||||
term = TYPE_TERM
|
||||
|
||||
if GRAPH in (key, term_id):
|
||||
if dataset.context_aware and not no_id:
|
||||
if TYPE_CHECKING:
|
||||
assert isinstance(dataset, ConjunctiveGraph)
|
||||
# type error: Argument 1 to "get_context" of "ConjunctiveGraph" has incompatible type "Node"; expected "Union[IdentifiedNode, str, None]"
|
||||
subgraph = dataset.get_context(subj) # type: ignore[arg-type]
|
||||
else:
|
||||
subgraph = graph
|
||||
for onode in obj_nodes:
|
||||
self._add_to_graph(dataset, subgraph, context, onode)
|
||||
return
|
||||
|
||||
if SET in (key, term_id):
|
||||
for onode in obj_nodes:
|
||||
self._add_to_graph(dataset, graph, context, onode)
|
||||
return
|
||||
|
||||
if INCLUDED in (key, term_id):
|
||||
for onode in obj_nodes:
|
||||
self._add_to_graph(dataset, graph, context, onode)
|
||||
return
|
||||
|
||||
if context.version >= 1.1 and key in context.get_keys(NEST):
|
||||
term = context.terms.get(key)
|
||||
if term and term.id is None:
|
||||
return
|
||||
objs = obj if isinstance(obj, list) else [obj]
|
||||
for obj in objs:
|
||||
if not isinstance(obj, dict):
|
||||
continue
|
||||
for nkey, nobj in obj.items():
|
||||
# NOTE: we've already captured subject
|
||||
if nkey in context.get_keys(ID):
|
||||
continue
|
||||
subcontext = context.get_context_for_type(obj)
|
||||
# type error: Argument 3 to "_key_to_graph" of "Parser" has incompatible type "Optional[Context]"; expected "Context"
|
||||
self._key_to_graph(dataset, graph, subcontext, subj, nkey, nobj) # type: ignore[arg-type]
|
||||
return
|
||||
|
||||
pred_uri = term.id if term else context.expand(key)
|
||||
|
||||
context = context.get_context_for_term(term)
|
||||
|
||||
# Flatten deep nested lists
|
||||
def flatten(n: Iterable[Any]) -> List[Any]:
|
||||
flattened = []
|
||||
for obj in n:
|
||||
if isinstance(obj, dict):
|
||||
objs = context.get_set(obj)
|
||||
if objs is not None:
|
||||
obj = objs
|
||||
if isinstance(obj, list):
|
||||
flattened += flatten(obj)
|
||||
continue
|
||||
flattened.append(obj)
|
||||
return flattened
|
||||
|
||||
obj_nodes = flatten(obj_nodes)
|
||||
|
||||
if not pred_uri:
|
||||
return
|
||||
|
||||
if term and term.reverse:
|
||||
reverse = not reverse
|
||||
|
||||
pred: IdentifiedNode
|
||||
bid = self._get_bnodeid(pred_uri)
|
||||
if bid:
|
||||
if not self.generalized_rdf:
|
||||
return
|
||||
pred = BNode(bid)
|
||||
if self.skolemize:
|
||||
pred = pred.skolemize()
|
||||
else:
|
||||
pred = URIRef(pred_uri)
|
||||
|
||||
for obj_node in obj_nodes:
|
||||
obj = self._to_object(dataset, graph, context, term, obj_node)
|
||||
if obj is None:
|
||||
continue
|
||||
if reverse:
|
||||
graph.add((obj, pred, subj))
|
||||
else:
|
||||
graph.add((subj, pred, obj))
|
||||
|
||||
def _parse_container(
|
||||
self, context: Context, term: Term, obj: Dict[str, Any]
|
||||
) -> List[Any]:
|
||||
if LANG in term.container:
|
||||
obj_nodes = []
|
||||
for lang, values in obj.items():
|
||||
if not isinstance(values, list):
|
||||
values = [values]
|
||||
if lang in context.get_keys(NONE):
|
||||
obj_nodes += values
|
||||
else:
|
||||
for v in values:
|
||||
obj_nodes.append((v, lang))
|
||||
return obj_nodes
|
||||
|
||||
v11 = context.version >= 1.1
|
||||
|
||||
if v11 and GRAPH in term.container and ID in term.container:
|
||||
return [
|
||||
(
|
||||
dict({GRAPH: o})
|
||||
if k in context.get_keys(NONE)
|
||||
else dict({ID: k, GRAPH: o}) if isinstance(o, dict) else o
|
||||
)
|
||||
for k, o in obj.items()
|
||||
]
|
||||
|
||||
elif v11 and GRAPH in term.container and INDEX in term.container:
|
||||
return [dict({GRAPH: o}) for k, o in obj.items()]
|
||||
|
||||
elif v11 and GRAPH in term.container:
|
||||
return [dict({GRAPH: obj})]
|
||||
|
||||
elif v11 and ID in term.container:
|
||||
return [
|
||||
(
|
||||
dict({ID: k}, **o)
|
||||
if isinstance(o, dict) and k not in context.get_keys(NONE)
|
||||
else o
|
||||
)
|
||||
for k, o in obj.items()
|
||||
]
|
||||
|
||||
elif v11 and TYPE in term.container:
|
||||
return [
|
||||
(
|
||||
self._add_type(
|
||||
context,
|
||||
(
|
||||
{ID: context.expand(o) if term.type == VOCAB else o}
|
||||
if isinstance(o, str)
|
||||
else o
|
||||
),
|
||||
k,
|
||||
)
|
||||
if isinstance(o, (dict, str)) and k not in context.get_keys(NONE)
|
||||
else o
|
||||
)
|
||||
for k, o in obj.items()
|
||||
]
|
||||
|
||||
elif INDEX in term.container:
|
||||
obj_nodes = []
|
||||
for key, nodes in obj.items():
|
||||
if not isinstance(nodes, list):
|
||||
nodes = [nodes]
|
||||
for node in nodes:
|
||||
if v11 and term.index and key not in context.get_keys(NONE):
|
||||
if not isinstance(node, dict):
|
||||
node = {ID: node}
|
||||
values = node.get(term.index, [])
|
||||
if not isinstance(values, list):
|
||||
values = [values]
|
||||
values.append(key)
|
||||
node[term.index] = values
|
||||
obj_nodes.append(node)
|
||||
return obj_nodes
|
||||
|
||||
return [obj]
|
||||
|
||||
@staticmethod
|
||||
def _add_type(context: Context, o: Dict[str, Any], k: str) -> Dict[str, Any]:
|
||||
otype = context.get_type(o) or []
|
||||
if otype and not isinstance(otype, list):
|
||||
otype = [otype]
|
||||
otype.append(k)
|
||||
o[TYPE] = otype
|
||||
return o
|
||||
|
||||
def _to_object(
|
||||
self,
|
||||
dataset: Graph,
|
||||
graph: Graph,
|
||||
context: Context,
|
||||
term: Optional[Term],
|
||||
node: Any,
|
||||
inlist: bool = False,
|
||||
) -> Optional[Node]:
|
||||
if isinstance(node, tuple):
|
||||
value, lang = node
|
||||
if value is None:
|
||||
# type error: Return value expected
|
||||
return # type: ignore[return-value]
|
||||
if lang and " " in lang:
|
||||
# type error: Return value expected
|
||||
return # type: ignore[return-value]
|
||||
return Literal(value, lang=lang)
|
||||
|
||||
if isinstance(node, dict):
|
||||
node_list = context.get_list(node)
|
||||
if node_list is not None:
|
||||
if inlist and not self.allow_lists_of_lists:
|
||||
# type error: Return value expected
|
||||
return # type: ignore[return-value]
|
||||
listref = self._add_list(dataset, graph, context, term, node_list)
|
||||
if listref:
|
||||
return listref
|
||||
|
||||
else: # expand compacted value
|
||||
if term and term.type:
|
||||
if term.type == JSON:
|
||||
node = self._to_typed_json_value(node)
|
||||
elif node is None:
|
||||
# type error: Return value expected
|
||||
return # type: ignore[return-value]
|
||||
elif term.type == ID and isinstance(node, str):
|
||||
node = {ID: context.resolve(node)}
|
||||
elif term.type == VOCAB and isinstance(node, str):
|
||||
node = {ID: context.expand(node) or context.resolve_iri(node)}
|
||||
else:
|
||||
node = {TYPE: term.type, VALUE: node}
|
||||
else:
|
||||
if node is None:
|
||||
# type error: Return value expected
|
||||
return # type: ignore[return-value]
|
||||
if isinstance(node, float):
|
||||
return Literal(node, datatype=XSD.double)
|
||||
|
||||
if term and term.language is not UNDEF:
|
||||
lang = term.language
|
||||
else:
|
||||
lang = context.language
|
||||
return Literal(node, lang=lang)
|
||||
|
||||
lang = context.get_language(node)
|
||||
datatype = not lang and context.get_type(node) or None
|
||||
value = context.get_value(node)
|
||||
# type error: Unsupported operand types for in ("Optional[Any]" and "Generator[str, None, None]")
|
||||
if datatype in context.get_keys(JSON): # type: ignore[operator]
|
||||
node = self._to_typed_json_value(value)
|
||||
datatype = context.get_type(node)
|
||||
value = context.get_value(node)
|
||||
|
||||
if lang or context.get_key(VALUE) in node or VALUE in node:
|
||||
if value is None:
|
||||
return None
|
||||
if lang:
|
||||
if " " in lang:
|
||||
# type error: Return value expected
|
||||
return # type: ignore[return-value]
|
||||
return Literal(value, lang=lang)
|
||||
elif datatype:
|
||||
return Literal(value, datatype=context.expand(datatype))
|
||||
else:
|
||||
return Literal(value)
|
||||
else:
|
||||
return self._add_to_graph(dataset, graph, context, node)
|
||||
|
||||
def _to_rdf_id(self, context: Context, id_val: str) -> Optional[IdentifiedNode]:
|
||||
bid = self._get_bnodeid(id_val)
|
||||
if bid:
|
||||
b = BNode(bid)
|
||||
if self.skolemize:
|
||||
return b.skolemize()
|
||||
return b
|
||||
else:
|
||||
uri = context.resolve(id_val)
|
||||
if not self.generalized_rdf and ":" not in uri:
|
||||
return None
|
||||
node: IdentifiedNode = URIRef(uri)
|
||||
if not str(node):
|
||||
if id_val not in self.invalid_uri_to_bnode:
|
||||
self.invalid_uri_to_bnode[id_val] = BNode(secrets.token_urlsafe(20))
|
||||
node = self.invalid_uri_to_bnode[id_val]
|
||||
return node
|
||||
|
||||
def _get_bnodeid(self, ref: str) -> Optional[str]:
|
||||
if not ref.startswith("_:"):
|
||||
# type error: Return value expected
|
||||
return # type: ignore[return-value]
|
||||
bid = ref.split("_:", 1)[-1]
|
||||
return bid or None
|
||||
|
||||
def _add_list(
|
||||
self,
|
||||
dataset: Graph,
|
||||
graph: Graph,
|
||||
context: Context,
|
||||
term: Optional[Term],
|
||||
node_list: Any,
|
||||
) -> IdentifiedNode:
|
||||
if not isinstance(node_list, list):
|
||||
node_list = [node_list]
|
||||
|
||||
first_subj: Union[URIRef, BNode] = BNode()
|
||||
if self.skolemize and isinstance(first_subj, BNode):
|
||||
first_subj = first_subj.skolemize()
|
||||
|
||||
rest: Union[URIRef, BNode, None]
|
||||
subj, rest = first_subj, None
|
||||
|
||||
for node in node_list:
|
||||
if node is None:
|
||||
continue
|
||||
|
||||
if rest:
|
||||
# type error: Statement is unreachable
|
||||
graph.add((subj, RDF.rest, rest)) # type: ignore[unreachable]
|
||||
subj = rest
|
||||
|
||||
obj = self._to_object(dataset, graph, context, term, node, inlist=True)
|
||||
|
||||
if obj is None:
|
||||
continue
|
||||
|
||||
graph.add((subj, RDF.first, obj))
|
||||
rest = BNode()
|
||||
if self.skolemize and isinstance(rest, BNode):
|
||||
rest = rest.skolemize()
|
||||
|
||||
if rest:
|
||||
graph.add((subj, RDF.rest, RDF.nil))
|
||||
return first_subj
|
||||
else:
|
||||
return RDF.nil
|
||||
|
||||
@staticmethod
|
||||
def _to_typed_json_value(value: Any) -> Dict[str, str]:
|
||||
if _HAS_ORJSON:
|
||||
val_string: str = orjson.dumps(
|
||||
value,
|
||||
option=orjson.OPT_SORT_KEYS | orjson.OPT_NON_STR_KEYS,
|
||||
).decode("utf-8")
|
||||
else:
|
||||
val_string = json.dumps(
|
||||
value, separators=(",", ":"), sort_keys=True, ensure_ascii=False
|
||||
)
|
||||
return {
|
||||
TYPE: RDF.JSON,
|
||||
VALUE: val_string,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def _expand_nested_list(cls, obj_nodes: List[Any]) -> Dict[str, List[Any]]:
|
||||
result = [
|
||||
cls._expand_nested_list(o) if isinstance(o, list) else o for o in obj_nodes
|
||||
]
|
||||
return {LIST: result}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,133 @@
|
||||
"""
|
||||
This is a rdflib plugin for parsing NQuad files into Conjunctive
|
||||
graphs that can be used and queried. The store that backs the graph
|
||||
*must* be able to handle contexts.
|
||||
|
||||
>>> from rdflib import ConjunctiveGraph, URIRef, Namespace
|
||||
>>> g = ConjunctiveGraph()
|
||||
>>> data = open("test/data/nquads.rdflib/example.nquads", "rb")
|
||||
>>> g.parse(data, format="nquads") # doctest:+ELLIPSIS
|
||||
<Graph identifier=... (<class 'rdflib.graph.Graph'>)>
|
||||
>>> assert len(g.store) == 449
|
||||
>>> # There should be 16 separate contexts
|
||||
>>> assert len([x for x in g.store.contexts()]) == 16
|
||||
>>> # is the name of entity E10009 "Arco Publications"?
|
||||
>>> # (in graph http://bibliographica.org/entity/E10009)
|
||||
>>> # Looking for:
|
||||
>>> # <http://bibliographica.org/entity/E10009>
|
||||
>>> # <http://xmlns.com/foaf/0.1/name>
|
||||
>>> # "Arco Publications"
|
||||
>>> # <http://bibliographica.org/entity/E10009>
|
||||
>>> s = URIRef("http://bibliographica.org/entity/E10009")
|
||||
>>> FOAF = Namespace("http://xmlns.com/foaf/0.1/")
|
||||
>>> assert(g.value(s, FOAF.name).eq("Arco Publications"))
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from codecs import getreader
|
||||
from typing import Any, MutableMapping, Optional
|
||||
|
||||
from rdflib.exceptions import ParserError as ParseError
|
||||
from rdflib.graph import ConjunctiveGraph, Dataset, Graph
|
||||
from rdflib.parser import InputSource
|
||||
|
||||
# Build up from the NTriples parser:
|
||||
from rdflib.plugins.parsers.ntriples import W3CNTriplesParser, r_tail, r_wspace
|
||||
from rdflib.term import BNode
|
||||
|
||||
__all__ = ["NQuadsParser"]
|
||||
|
||||
_BNodeContextType = MutableMapping[str, BNode]
|
||||
|
||||
|
||||
class NQuadsParser(W3CNTriplesParser):
|
||||
|
||||
# type error: Signature of "parse" incompatible with supertype "W3CNTriplesParser"
|
||||
def parse( # type: ignore[override]
|
||||
self,
|
||||
inputsource: InputSource,
|
||||
sink: Graph,
|
||||
bnode_context: Optional[_BNodeContextType] = None,
|
||||
skolemize: bool = False,
|
||||
**kwargs: Any,
|
||||
):
|
||||
"""
|
||||
Parse inputsource as an N-Quads file.
|
||||
|
||||
:type inputsource: `rdflib.parser.InputSource`
|
||||
:param inputsource: the source of N-Quads-formatted data
|
||||
:type sink: `rdflib.graph.Graph`
|
||||
:param sink: where to send parsed triples
|
||||
:type bnode_context: `dict`, optional
|
||||
:param bnode_context: a dict mapping blank node identifiers to `~rdflib.term.BNode` instances.
|
||||
See `.W3CNTriplesParser.parse`
|
||||
"""
|
||||
assert (
|
||||
sink.store.context_aware
|
||||
), "NQuadsParser must be given a context-aware store."
|
||||
# Set default_union to True to mimic ConjunctiveGraph behavior
|
||||
ds = Dataset(store=sink.store, default_union=True)
|
||||
ds_default = ds.default_context # the DEFAULT_DATASET_GRAPH_ID
|
||||
new_default_context = None
|
||||
if isinstance(sink, (Dataset, ConjunctiveGraph)):
|
||||
new_default_context = sink.default_context
|
||||
elif sink.identifier is not None:
|
||||
if sink.identifier == ds_default.identifier:
|
||||
new_default_context = sink
|
||||
else:
|
||||
new_default_context = ds.get_context(sink.identifier)
|
||||
|
||||
if new_default_context is not None:
|
||||
ds.default_context = new_default_context
|
||||
ds.remove_graph(ds_default) # remove the original unused default graph
|
||||
# type error: Incompatible types in assignment (expression has type "ConjunctiveGraph", base class "W3CNTriplesParser" defined the type as "Union[DummySink, NTGraphSink]")
|
||||
self.sink: Dataset = ds # type: ignore[assignment]
|
||||
self.skolemize = skolemize
|
||||
|
||||
source = inputsource.getCharacterStream()
|
||||
if not source:
|
||||
source = inputsource.getByteStream()
|
||||
source = getreader("utf-8")(source)
|
||||
|
||||
if not hasattr(source, "read"):
|
||||
raise ParseError("Item to parse must be a file-like object.")
|
||||
|
||||
self.file = source
|
||||
self.buffer = ""
|
||||
while True:
|
||||
self.line = __line = self.readline()
|
||||
if self.line is None:
|
||||
break
|
||||
try:
|
||||
self.parseline(bnode_context)
|
||||
except ParseError as msg:
|
||||
raise ParseError("Invalid line (%s):\n%r" % (msg, __line))
|
||||
|
||||
return self.sink
|
||||
|
||||
def parseline(self, bnode_context: Optional[_BNodeContextType] = None) -> None:
|
||||
self.eat(r_wspace)
|
||||
if (not self.line) or self.line.startswith("#"):
|
||||
return # The line is empty or a comment
|
||||
|
||||
subject = self.subject(bnode_context)
|
||||
self.eat(r_wspace)
|
||||
|
||||
predicate = self.predicate()
|
||||
self.eat(r_wspace)
|
||||
|
||||
obj = self.object(bnode_context)
|
||||
self.eat(r_wspace)
|
||||
|
||||
context = self.uriref() or self.nodeid(bnode_context)
|
||||
self.eat(r_tail)
|
||||
|
||||
if self.line:
|
||||
raise ParseError("Trailing garbage")
|
||||
# Must have a context aware store - add on a normal Graph
|
||||
# discards anything where the ctx != graph.identifier
|
||||
if context:
|
||||
self.sink.get_context(context).add((subject, predicate, obj))
|
||||
else:
|
||||
self.sink.default_context.add((subject, predicate, obj))
|
||||
@@ -0,0 +1,385 @@
|
||||
"""\
|
||||
N-Triples Parser
|
||||
License: GPL 2, W3C, BSD, or MIT
|
||||
Author: Sean B. Palmer, inamidst.com
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import codecs
|
||||
import re
|
||||
from io import BytesIO, StringIO, TextIOBase
|
||||
from typing import (
|
||||
IO,
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Match,
|
||||
MutableMapping,
|
||||
Optional,
|
||||
Pattern,
|
||||
TextIO,
|
||||
Union,
|
||||
)
|
||||
|
||||
from rdflib.compat import _string_escape_map, decodeUnicodeEscape
|
||||
from rdflib.exceptions import ParserError as ParseError
|
||||
from rdflib.parser import InputSource, Parser
|
||||
from rdflib.term import BNode as bNode
|
||||
from rdflib.term import Literal, URIRef
|
||||
from rdflib.term import URIRef as URI # noqa: N814
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import typing_extensions as te
|
||||
|
||||
from rdflib.graph import Graph, _ObjectType, _PredicateType, _SubjectType
|
||||
|
||||
__all__ = [
|
||||
"unquote",
|
||||
"uriquote",
|
||||
"W3CNTriplesParser",
|
||||
"NTGraphSink",
|
||||
"NTParser",
|
||||
"DummySink",
|
||||
]
|
||||
|
||||
uriref = r'<([^:]+:[^\s"<>]*)>'
|
||||
literal = r'"([^"\\]*(?:\\.[^"\\]*)*)"'
|
||||
litinfo = r"(?:@([a-zA-Z]+(?:-[a-zA-Z0-9]+)*)|\^\^" + uriref + r")?"
|
||||
|
||||
r_line = re.compile(r"([^\r\n]*)(?:\r\n|\r|\n)")
|
||||
r_wspace = re.compile(r"[ \t]*")
|
||||
r_wspaces = re.compile(r"[ \t]+")
|
||||
r_tail = re.compile(r"[ \t]*\.[ \t]*(#.*)?")
|
||||
r_uriref = re.compile(uriref)
|
||||
r_nodeid = re.compile(r"_:([A-Za-z0-9_:]([-A-Za-z0-9_:\.]*[-A-Za-z0-9_:])?)")
|
||||
r_literal = re.compile(literal + litinfo)
|
||||
|
||||
bufsiz = 2048
|
||||
validate = False
|
||||
|
||||
|
||||
class DummySink:
|
||||
def __init__(self):
|
||||
self.length = 0
|
||||
|
||||
def triple(self, s, p, o):
|
||||
self.length += 1
|
||||
print(s, p, o)
|
||||
|
||||
|
||||
r_safe = re.compile(r"([\x20\x21\x23-\x5B\x5D-\x7E]+)")
|
||||
r_quot = re.compile(r"""\\([tbnrf"'\\])""")
|
||||
r_uniquot = re.compile(r"\\u([0-9A-Fa-f]{4})|\\U([0-9A-Fa-f]{8})")
|
||||
|
||||
|
||||
def unquote(s: str) -> str:
|
||||
"""Unquote an N-Triples string."""
|
||||
if not validate:
|
||||
if isinstance(s, str): # nquads
|
||||
s = decodeUnicodeEscape(s)
|
||||
else:
|
||||
s = s.decode("unicode-escape") # type: ignore[unreachable]
|
||||
|
||||
return s
|
||||
else:
|
||||
result = []
|
||||
while s:
|
||||
m = r_safe.match(s)
|
||||
if m:
|
||||
s = s[m.end() :]
|
||||
result.append(m.group(1))
|
||||
continue
|
||||
|
||||
m = r_quot.match(s)
|
||||
if m:
|
||||
s = s[2:]
|
||||
result.append(_string_escape_map[m.group(1)])
|
||||
continue
|
||||
|
||||
m = r_uniquot.match(s)
|
||||
if m:
|
||||
s = s[m.end() :]
|
||||
u, U = m.groups() # noqa: N806
|
||||
codepoint = int(u or U, 16)
|
||||
if codepoint > 0x10FFFF:
|
||||
raise ParseError("Disallowed codepoint: %08X" % codepoint)
|
||||
result.append(chr(codepoint))
|
||||
elif s.startswith("\\"):
|
||||
raise ParseError("Illegal escape at: %s..." % s[:10])
|
||||
else:
|
||||
raise ParseError("Illegal literal character: %r" % s[0])
|
||||
return "".join(result)
|
||||
|
||||
|
||||
r_hibyte = re.compile(r"([\x80-\xFF])")
|
||||
|
||||
|
||||
def uriquote(uri: str) -> str:
|
||||
if not validate:
|
||||
return uri
|
||||
else:
|
||||
return r_hibyte.sub(lambda m: "%%%02X" % ord(m.group(1)), uri)
|
||||
|
||||
|
||||
_BNodeContextType = MutableMapping[str, bNode]
|
||||
|
||||
|
||||
class W3CNTriplesParser:
|
||||
"""An N-Triples Parser.
|
||||
This is a legacy-style Triples parser for NTriples provided by W3C
|
||||
Usage::
|
||||
|
||||
p = W3CNTriplesParser(sink=MySink())
|
||||
sink = p.parse(f) # file; use parsestring for a string
|
||||
|
||||
To define a context in which blank node identifiers refer to the same blank node
|
||||
across instances of NTriplesParser, pass the same dict as ``bnode_context`` to each
|
||||
instance. By default, a new blank node context is created for each instance of
|
||||
`W3CNTriplesParser`.
|
||||
"""
|
||||
|
||||
__slots__ = ("_bnode_ids", "sink", "buffer", "file", "line", "skolemize")
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
sink: Optional[Union[DummySink, NTGraphSink]] = None,
|
||||
bnode_context: Optional[_BNodeContextType] = None,
|
||||
):
|
||||
self.skolemize = False
|
||||
|
||||
if bnode_context is not None:
|
||||
self._bnode_ids = bnode_context
|
||||
else:
|
||||
self._bnode_ids = {}
|
||||
|
||||
self.sink: Union[DummySink, NTGraphSink]
|
||||
if sink is not None:
|
||||
self.sink = sink
|
||||
else:
|
||||
self.sink = DummySink()
|
||||
|
||||
self.buffer: Optional[str] = None
|
||||
self.file: Optional[Union[TextIO, codecs.StreamReader]] = None
|
||||
self.line: Optional[str] = ""
|
||||
|
||||
def parse(
|
||||
self,
|
||||
f: Union[TextIO, IO[bytes], codecs.StreamReader],
|
||||
bnode_context: Optional[_BNodeContextType] = None,
|
||||
skolemize: bool = False,
|
||||
) -> Union[DummySink, NTGraphSink]:
|
||||
"""
|
||||
Parse f as an N-Triples file.
|
||||
|
||||
:type f: :term:`file object`
|
||||
:param f: the N-Triples source
|
||||
:type bnode_context: `dict`, optional
|
||||
:param bnode_context: a dict mapping blank node identifiers (e.g., ``a`` in ``_:a``)
|
||||
to `~rdflib.term.BNode` instances. An empty dict can be
|
||||
passed in to define a distinct context for a given call to
|
||||
`parse`.
|
||||
"""
|
||||
|
||||
if not hasattr(f, "read"):
|
||||
raise ParseError("Item to parse must be a file-like object.")
|
||||
|
||||
if not hasattr(f, "encoding") and not hasattr(f, "charbuffer"):
|
||||
# someone still using a bytestream here?
|
||||
f = codecs.getreader("utf-8")(f)
|
||||
|
||||
self.skolemize = skolemize
|
||||
self.file = f # type: ignore[assignment]
|
||||
self.buffer = ""
|
||||
while True:
|
||||
self.line = self.readline()
|
||||
if self.line is None:
|
||||
break
|
||||
try:
|
||||
self.parseline(bnode_context=bnode_context)
|
||||
except ParseError:
|
||||
raise ParseError("Invalid line: {}".format(self.line))
|
||||
return self.sink
|
||||
|
||||
def parsestring(self, s: Union[bytes, bytearray, str], **kwargs) -> None:
|
||||
"""Parse s as an N-Triples string."""
|
||||
if not isinstance(s, (str, bytes, bytearray)):
|
||||
raise ParseError("Item to parse must be a string instance.")
|
||||
f: Union[codecs.StreamReader, StringIO]
|
||||
if isinstance(s, (bytes, bytearray)):
|
||||
f = codecs.getreader("utf-8")(BytesIO(s))
|
||||
else:
|
||||
f = StringIO(s)
|
||||
self.parse(f, **kwargs)
|
||||
|
||||
def readline(self) -> Optional[str]:
|
||||
"""Read an N-Triples line from buffered input."""
|
||||
# N-Triples lines end in either CRLF, CR, or LF
|
||||
# Therefore, we can't just use f.readline()
|
||||
if not self.buffer:
|
||||
# type error: Item "None" of "Union[TextIO, StreamReader, None]" has no attribute "read"
|
||||
buffer = self.file.read(bufsiz) # type: ignore[union-attr]
|
||||
if not buffer:
|
||||
return None
|
||||
self.buffer = buffer
|
||||
|
||||
while True:
|
||||
m = r_line.match(self.buffer)
|
||||
if m: # the more likely prospect
|
||||
self.buffer = self.buffer[m.end() :]
|
||||
return m.group(1)
|
||||
else:
|
||||
# type error: Item "None" of "Union[TextIO, StreamReader, None]" has no attribute "read"
|
||||
buffer = self.file.read(bufsiz) # type: ignore[union-attr]
|
||||
if not buffer and not self.buffer.isspace():
|
||||
# Last line does not need to be terminated with a newline
|
||||
buffer += "\n"
|
||||
elif not buffer:
|
||||
return None
|
||||
self.buffer += buffer
|
||||
|
||||
def parseline(self, bnode_context: Optional[_BNodeContextType] = None) -> None:
|
||||
self.eat(r_wspace)
|
||||
if (not self.line) or self.line.startswith("#"):
|
||||
return # The line is empty or a comment
|
||||
|
||||
subject = self.subject(bnode_context)
|
||||
self.eat(r_wspaces)
|
||||
|
||||
predicate = self.predicate()
|
||||
self.eat(r_wspaces)
|
||||
|
||||
object_ = self.object(bnode_context)
|
||||
self.eat(r_tail)
|
||||
|
||||
if self.line:
|
||||
raise ParseError("Trailing garbage: {}".format(self.line))
|
||||
self.sink.triple(subject, predicate, object_)
|
||||
|
||||
def peek(self, token: str) -> bool:
|
||||
return self.line.startswith(token) # type: ignore[union-attr]
|
||||
|
||||
def eat(self, pattern: Pattern[str]) -> Match[str]:
|
||||
m = pattern.match(self.line) # type: ignore[arg-type]
|
||||
if not m: # @@ Why can't we get the original pattern?
|
||||
# print(dir(pattern))
|
||||
# print repr(self.line), type(self.line)
|
||||
raise ParseError("Failed to eat %s at %s" % (pattern.pattern, self.line))
|
||||
self.line = self.line[m.end() :] # type: ignore[index]
|
||||
return m
|
||||
|
||||
def subject(self, bnode_context=None) -> Union[bNode, URIRef]:
|
||||
# @@ Consider using dictionary cases
|
||||
subj = self.uriref() or self.nodeid(bnode_context)
|
||||
if not subj:
|
||||
raise ParseError("Subject must be uriref or nodeID")
|
||||
return subj
|
||||
|
||||
def predicate(self) -> Union[bNode, URIRef]:
|
||||
pred = self.uriref()
|
||||
if not pred:
|
||||
raise ParseError("Predicate must be uriref")
|
||||
return pred
|
||||
|
||||
def object(
|
||||
self, bnode_context: Optional[_BNodeContextType] = None
|
||||
) -> Union[URI, bNode, Literal]:
|
||||
objt = self.uriref() or self.nodeid(bnode_context) or self.literal()
|
||||
if objt is False:
|
||||
raise ParseError("Unrecognised object type")
|
||||
return objt
|
||||
|
||||
def uriref(self) -> Union[te.Literal[False], URI]:
|
||||
if self.peek("<"):
|
||||
uri = self.eat(r_uriref).group(1)
|
||||
uri = unquote(uri)
|
||||
uri = uriquote(uri)
|
||||
return URI(uri)
|
||||
return False
|
||||
|
||||
def nodeid(
|
||||
self, bnode_context: Optional[_BNodeContextType] = None
|
||||
) -> Union[te.Literal[False], bNode, URI]:
|
||||
if self.peek("_"):
|
||||
if self.skolemize:
|
||||
bnode_id = self.eat(r_nodeid).group(1)
|
||||
return bNode(bnode_id).skolemize()
|
||||
|
||||
else:
|
||||
# Fix for https://github.com/RDFLib/rdflib/issues/204
|
||||
if bnode_context is None:
|
||||
bnode_context = self._bnode_ids
|
||||
bnode_id = self.eat(r_nodeid).group(1)
|
||||
new_id = bnode_context.get(bnode_id, None)
|
||||
if new_id is not None:
|
||||
# Re-map to id specific to this doc
|
||||
return bNode(new_id)
|
||||
else:
|
||||
# Replace with freshly-generated document-specific BNode id
|
||||
bnode = bNode()
|
||||
# Store the mapping
|
||||
bnode_context[bnode_id] = bnode
|
||||
return bnode
|
||||
return False
|
||||
|
||||
def literal(self) -> Union[te.Literal[False], Literal]:
|
||||
if self.peek('"'):
|
||||
lit, lang, dtype = self.eat(r_literal).groups()
|
||||
if lang:
|
||||
lang = lang
|
||||
else:
|
||||
lang = None
|
||||
if dtype:
|
||||
dtype = unquote(dtype)
|
||||
dtype = uriquote(dtype)
|
||||
dtype = URI(dtype)
|
||||
else:
|
||||
dtype = None
|
||||
if lang and dtype:
|
||||
raise ParseError("Can't have both a language and a datatype")
|
||||
lit = unquote(lit)
|
||||
return Literal(lit, lang, dtype)
|
||||
return False
|
||||
|
||||
|
||||
class NTGraphSink:
|
||||
__slots__ = ("g",)
|
||||
|
||||
def __init__(self, graph: Graph):
|
||||
self.g = graph
|
||||
|
||||
def triple(self, s: _SubjectType, p: _PredicateType, o: _ObjectType) -> None:
|
||||
self.g.add((s, p, o))
|
||||
|
||||
|
||||
class NTParser(Parser):
|
||||
"""parser for the ntriples format, often stored with the .nt extension
|
||||
|
||||
See http://www.w3.org/TR/rdf-testcases/#ntriples"""
|
||||
|
||||
__slots__ = ()
|
||||
|
||||
@classmethod
|
||||
def parse(cls, source: InputSource, sink: Graph, **kwargs: Any) -> None:
|
||||
"""
|
||||
Parse the NT format
|
||||
|
||||
:type source: `rdflib.parser.InputSource`
|
||||
:param source: the source of NT-formatted data
|
||||
:type sink: `rdflib.graph.Graph`
|
||||
:param sink: where to send parsed triples
|
||||
:param kwargs: Additional arguments to pass to `.W3CNTriplesParser.parse`
|
||||
"""
|
||||
f: Union[TextIO, IO[bytes], codecs.StreamReader]
|
||||
f = source.getCharacterStream()
|
||||
if not f:
|
||||
b = source.getByteStream()
|
||||
# TextIOBase includes: StringIO and TextIOWrapper
|
||||
if isinstance(b, TextIOBase):
|
||||
# f is not really a ByteStream, but a CharacterStream
|
||||
f = b # type: ignore[assignment]
|
||||
else:
|
||||
# since N-Triples 1.1 files can and should be utf-8 encoded
|
||||
f = codecs.getreader("utf-8")(b)
|
||||
parser = W3CNTriplesParser(NTGraphSink(sink))
|
||||
parser.parse(f, **kwargs)
|
||||
f.close()
|
||||
@@ -0,0 +1,183 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from codecs import getreader
|
||||
from enum import Enum
|
||||
from typing import TYPE_CHECKING, Any, MutableMapping, Optional, Union
|
||||
|
||||
from rdflib.exceptions import ParserError as ParseError
|
||||
from rdflib.graph import Dataset
|
||||
from rdflib.parser import InputSource
|
||||
from rdflib.plugins.parsers.nquads import NQuadsParser
|
||||
|
||||
# Build up from the NTriples parser:
|
||||
from rdflib.plugins.parsers.ntriples import r_nodeid, r_tail, r_uriref, r_wspace
|
||||
from rdflib.term import BNode, URIRef
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import typing_extensions as te
|
||||
|
||||
__all__ = ["RDFPatchParser", "Operation"]
|
||||
|
||||
_BNodeContextType = MutableMapping[str, BNode]
|
||||
|
||||
|
||||
class Operation(Enum):
|
||||
"""
|
||||
Enum of RDF Patch operations.
|
||||
|
||||
Operations:
|
||||
- `AddTripleOrQuad` (A): Adds a triple or quad.
|
||||
- `DeleteTripleOrQuad` (D): Deletes a triple or quad.
|
||||
- `AddPrefix` (PA): Adds a prefix.
|
||||
- `DeletePrefix` (PD): Deletes a prefix.
|
||||
- `TransactionStart` (TX): Starts a transaction.
|
||||
- `TransactionCommit` (TC): Commits a transaction.
|
||||
- `TransactionAbort` (TA): Aborts a transaction.
|
||||
- `Header` (H): Specifies a header.
|
||||
"""
|
||||
|
||||
AddTripleOrQuad = "A"
|
||||
DeleteTripleOrQuad = "D"
|
||||
AddPrefix = "PA"
|
||||
DeletePrefix = "PD"
|
||||
TransactionStart = "TX"
|
||||
TransactionCommit = "TC"
|
||||
TransactionAbort = "TA"
|
||||
Header = "H"
|
||||
|
||||
|
||||
class RDFPatchParser(NQuadsParser):
|
||||
def parse( # type: ignore[override]
|
||||
self,
|
||||
inputsource: InputSource,
|
||||
sink: Dataset,
|
||||
bnode_context: Optional[_BNodeContextType] = None,
|
||||
skolemize: bool = False,
|
||||
**kwargs: Any,
|
||||
) -> Dataset:
|
||||
"""
|
||||
Parse inputsource as an RDF Patch file.
|
||||
|
||||
:type inputsource: `rdflib.parser.InputSource`
|
||||
:param inputsource: the source of RDF Patch formatted data
|
||||
:type sink: `rdflib.graph.Dataset`
|
||||
:param sink: where to send parsed data
|
||||
:type bnode_context: `dict`, optional
|
||||
:param bnode_context: a dict mapping blank node identifiers to `~rdflib.term.BNode` instances.
|
||||
See `.W3CNTriplesParser.parse`
|
||||
"""
|
||||
assert sink.store.context_aware, (
|
||||
"RDFPatchParser must be given" " a context aware store."
|
||||
)
|
||||
# type error: Incompatible types in assignment (expression has type "ConjunctiveGraph", base class "W3CNTriplesParser" defined the type as "Union[DummySink, NTGraphSink]")
|
||||
self.sink: Dataset = Dataset(store=sink.store)
|
||||
self.skolemize = skolemize
|
||||
|
||||
source = inputsource.getCharacterStream()
|
||||
if not source:
|
||||
source = inputsource.getByteStream()
|
||||
source = getreader("utf-8")(source)
|
||||
|
||||
if not hasattr(source, "read"):
|
||||
raise ParseError("Item to parse must be a file-like object.")
|
||||
|
||||
self.file = source
|
||||
self.buffer = ""
|
||||
while True:
|
||||
self.line = __line = self.readline()
|
||||
if self.line is None:
|
||||
break
|
||||
try:
|
||||
self.parsepatch(bnode_context)
|
||||
except ParseError as msg:
|
||||
raise ParseError("Invalid line (%s):\n%r" % (msg, __line))
|
||||
return self.sink
|
||||
|
||||
def parsepatch(self, bnode_context: Optional[_BNodeContextType] = None) -> None:
|
||||
self.eat(r_wspace)
|
||||
# From spec: "No comments should be included (comments start # and run to end
|
||||
# of line)."
|
||||
if (not self.line) or self.line.startswith("#"):
|
||||
return # The line is empty or a comment
|
||||
|
||||
# if header, transaction, skip
|
||||
operation = self.operation()
|
||||
self.eat(r_wspace)
|
||||
|
||||
if operation in [Operation.AddTripleOrQuad, Operation.DeleteTripleOrQuad]:
|
||||
self.add_or_remove_triple_or_quad(operation, bnode_context)
|
||||
elif operation == Operation.AddPrefix:
|
||||
self.add_prefix()
|
||||
elif operation == Operation.DeletePrefix:
|
||||
self.delete_prefix()
|
||||
|
||||
def add_or_remove_triple_or_quad(
|
||||
self, operation, bnode_context: Optional[_BNodeContextType] = None
|
||||
) -> None:
|
||||
self.eat(r_wspace)
|
||||
if (not self.line) or self.line.startswith("#"):
|
||||
return # The line is empty or a comment
|
||||
|
||||
subject = self.labeled_bnode() or self.subject(bnode_context)
|
||||
self.eat(r_wspace)
|
||||
|
||||
predicate = self.predicate()
|
||||
self.eat(r_wspace)
|
||||
|
||||
obj = self.labeled_bnode() or self.object(bnode_context)
|
||||
self.eat(r_wspace)
|
||||
|
||||
context = self.labeled_bnode() or self.uriref() or self.nodeid(bnode_context)
|
||||
self.eat(r_tail)
|
||||
|
||||
if self.line:
|
||||
raise ParseError("Trailing garbage")
|
||||
# Must have a context aware store - add on a normal Graph
|
||||
# discards anything where the ctx != graph.identifier
|
||||
if operation == Operation.AddTripleOrQuad:
|
||||
if context:
|
||||
self.sink.get_context(context).add((subject, predicate, obj))
|
||||
else:
|
||||
self.sink.default_context.add((subject, predicate, obj))
|
||||
elif operation == Operation.DeleteTripleOrQuad:
|
||||
if context:
|
||||
self.sink.get_context(context).remove((subject, predicate, obj))
|
||||
else:
|
||||
self.sink.default_context.remove((subject, predicate, obj))
|
||||
|
||||
def add_prefix(self):
|
||||
# Extract prefix and URI from the line
|
||||
prefix, ns, _ = self.line.replace('"', "").replace("'", "").split(" ") # type: ignore[union-attr]
|
||||
ns_stripped = ns.strip("<>")
|
||||
self.sink.bind(prefix, ns_stripped)
|
||||
|
||||
def delete_prefix(self):
|
||||
prefix, _, _ = self.line.replace('"', "").replace("'", "").split(" ") # type: ignore[union-attr]
|
||||
self.sink.namespace_manager.bind(prefix, None, replace=True)
|
||||
|
||||
def operation(self) -> Operation:
|
||||
for op in Operation:
|
||||
if self.line.startswith(op.value): # type: ignore[union-attr]
|
||||
self.eat_op(op.value)
|
||||
return op
|
||||
raise ValueError(
|
||||
f'Invalid or no Operation found in line: "{self.line}". Valid Operations '
|
||||
f"codes are {', '.join([op.value for op in Operation])}"
|
||||
)
|
||||
|
||||
def eat_op(self, op: str) -> None:
|
||||
self.line = self.line.lstrip(op) # type: ignore[union-attr]
|
||||
|
||||
def nodeid(
|
||||
self, bnode_context: Optional[_BNodeContextType] = None
|
||||
) -> Union[te.Literal[False], BNode, URIRef]:
|
||||
if self.peek("_"):
|
||||
return BNode(self.eat(r_nodeid).group(1))
|
||||
return False
|
||||
|
||||
def labeled_bnode(self):
|
||||
if self.peek("<_"):
|
||||
plain_uri = self.eat(r_uriref).group(1)
|
||||
bnode_id = r_nodeid.match(plain_uri).group(1) # type: ignore[union-attr]
|
||||
return BNode(bnode_id)
|
||||
return False
|
||||
@@ -0,0 +1,651 @@
|
||||
"""
|
||||
An RDF/XML parser for RDFLib
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, NoReturn, Optional, Tuple
|
||||
from urllib.parse import urldefrag, urljoin
|
||||
from xml.sax import handler, make_parser, xmlreader
|
||||
from xml.sax.handler import ErrorHandler
|
||||
from xml.sax.saxutils import escape, quoteattr
|
||||
|
||||
from rdflib.exceptions import Error, ParserError
|
||||
from rdflib.graph import Graph
|
||||
from rdflib.namespace import RDF, is_ncname
|
||||
from rdflib.parser import InputSource, Parser
|
||||
from rdflib.plugins.parsers.RDFVOC import RDFVOC
|
||||
from rdflib.term import BNode, Identifier, Literal, URIRef
|
||||
|
||||
if TYPE_CHECKING:
|
||||
# from xml.sax.expatreader import ExpatLocator
|
||||
from xml.sax.xmlreader import AttributesImpl, Locator
|
||||
|
||||
from rdflib.graph import _ObjectType, _SubjectType, _TripleType
|
||||
|
||||
__all__ = ["create_parser", "BagID", "ElementHandler", "RDFXMLHandler", "RDFXMLParser"]
|
||||
|
||||
RDFNS = RDFVOC
|
||||
|
||||
# http://www.w3.org/TR/rdf-syntax-grammar/#eventterm-attribute-URI
|
||||
# A mapping from unqualified terms to their qualified version.
|
||||
UNQUALIFIED = {
|
||||
"about": RDFVOC.about,
|
||||
"ID": RDFVOC.ID,
|
||||
"type": RDFVOC.type,
|
||||
"resource": RDFVOC.resource,
|
||||
"parseType": RDFVOC.parseType,
|
||||
}
|
||||
|
||||
# http://www.w3.org/TR/rdf-syntax-grammar/#coreSyntaxTerms
|
||||
CORE_SYNTAX_TERMS = [
|
||||
RDFVOC.RDF,
|
||||
RDFVOC.ID,
|
||||
RDFVOC.about,
|
||||
RDFVOC.parseType,
|
||||
RDFVOC.resource,
|
||||
RDFVOC.nodeID,
|
||||
RDFVOC.datatype,
|
||||
]
|
||||
|
||||
# http://www.w3.org/TR/rdf-syntax-grammar/#syntaxTerms
|
||||
SYNTAX_TERMS = CORE_SYNTAX_TERMS + [RDFVOC.Description, RDFVOC.li]
|
||||
|
||||
# http://www.w3.org/TR/rdf-syntax-grammar/#oldTerms
|
||||
OLD_TERMS = [
|
||||
URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#aboutEach"),
|
||||
URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#aboutEachPrefix"),
|
||||
URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#bagID"),
|
||||
]
|
||||
|
||||
NODE_ELEMENT_EXCEPTIONS = (
|
||||
CORE_SYNTAX_TERMS
|
||||
+ [
|
||||
RDFVOC.li,
|
||||
]
|
||||
+ OLD_TERMS
|
||||
)
|
||||
NODE_ELEMENT_ATTRIBUTES = [RDFVOC.ID, RDFVOC.nodeID, RDFVOC.about]
|
||||
|
||||
PROPERTY_ELEMENT_EXCEPTIONS = (
|
||||
CORE_SYNTAX_TERMS
|
||||
+ [
|
||||
RDFVOC.Description,
|
||||
]
|
||||
+ OLD_TERMS
|
||||
)
|
||||
PROPERTY_ATTRIBUTE_EXCEPTIONS = (
|
||||
CORE_SYNTAX_TERMS + [RDFVOC.Description, RDFVOC.li] + OLD_TERMS
|
||||
)
|
||||
PROPERTY_ELEMENT_ATTRIBUTES = [RDFVOC.ID, RDFVOC.resource, RDFVOC.nodeID]
|
||||
|
||||
XMLNS = "http://www.w3.org/XML/1998/namespace"
|
||||
BASE = (XMLNS, "base")
|
||||
LANG = (XMLNS, "lang")
|
||||
|
||||
|
||||
class BagID(URIRef):
|
||||
__slots__ = ["li"]
|
||||
|
||||
def __init__(self, val):
|
||||
# type error: Too many arguments for "__init__" of "object"
|
||||
super(URIRef, self).__init__(val) # type: ignore[call-arg]
|
||||
self.li = 0
|
||||
|
||||
def next_li(self):
|
||||
self.li += 1
|
||||
# type error: Type expected within [...]
|
||||
return RDFNS["_%s" % self.li] # type: ignore[misc]
|
||||
|
||||
|
||||
class ElementHandler:
|
||||
__slots__ = [
|
||||
"start",
|
||||
"char",
|
||||
"end",
|
||||
"li",
|
||||
"id",
|
||||
"base",
|
||||
"subject",
|
||||
"predicate",
|
||||
"object",
|
||||
"list",
|
||||
"language",
|
||||
"datatype",
|
||||
"declared",
|
||||
"data",
|
||||
]
|
||||
|
||||
def __init__(self):
|
||||
self.start = None
|
||||
self.char = None
|
||||
self.end = None
|
||||
self.li = 0
|
||||
self.id = None
|
||||
self.base = None
|
||||
self.subject = None
|
||||
self.object = None
|
||||
self.list = None
|
||||
self.language = None
|
||||
self.datatype = None
|
||||
self.declared = None
|
||||
self.data = None
|
||||
|
||||
def next_li(self):
|
||||
self.li += 1
|
||||
return RDFVOC["_%s" % self.li]
|
||||
|
||||
|
||||
class RDFXMLHandler(handler.ContentHandler):
|
||||
def __init__(self, store: Graph):
|
||||
self.store = store
|
||||
self.preserve_bnode_ids = False
|
||||
self.reset()
|
||||
|
||||
def reset(self) -> None:
|
||||
document_element = ElementHandler()
|
||||
document_element.start = self.document_element_start
|
||||
document_element.end = lambda name, qname: None
|
||||
self.stack: List[Optional[ElementHandler]] = [
|
||||
None,
|
||||
document_element,
|
||||
]
|
||||
self.ids: Dict[str, int] = {} # remember IDs we have already seen
|
||||
self.bnode: Dict[str, Identifier] = {}
|
||||
self._ns_contexts: List[Dict[str, Optional[str]]] = [
|
||||
{}
|
||||
] # contains uri -> prefix dicts
|
||||
self._current_context: Dict[str, Optional[str]] = self._ns_contexts[-1]
|
||||
|
||||
# ContentHandler methods
|
||||
|
||||
def setDocumentLocator(self, locator: Locator):
|
||||
self.locator = locator
|
||||
|
||||
def startDocument(self) -> None:
|
||||
pass
|
||||
|
||||
def startPrefixMapping(self, prefix: Optional[str], namespace: str) -> None:
|
||||
self._ns_contexts.append(self._current_context.copy())
|
||||
self._current_context[namespace] = prefix
|
||||
self.store.bind(prefix, namespace or "", override=False)
|
||||
|
||||
def endPrefixMapping(self, prefix: Optional[str]) -> None:
|
||||
self._current_context = self._ns_contexts[-1]
|
||||
del self._ns_contexts[-1]
|
||||
|
||||
def startElementNS(
|
||||
self, name: Tuple[Optional[str], str], qname, attrs: AttributesImpl
|
||||
) -> None:
|
||||
stack = self.stack
|
||||
stack.append(ElementHandler())
|
||||
current = self.current
|
||||
parent = self.parent
|
||||
# type error: No overlaod for "get" of "AttributesImpl" mactches tuple (str, str)
|
||||
base = attrs.get(BASE, None) # type: ignore[call-overload, unused-ignore]
|
||||
if base is not None:
|
||||
base, frag = urldefrag(base)
|
||||
if parent and parent.base:
|
||||
base = urljoin(parent.base, base)
|
||||
else:
|
||||
systemId = self.locator.getPublicId() or self.locator.getSystemId()
|
||||
if systemId:
|
||||
base = urljoin(systemId, base)
|
||||
else:
|
||||
if parent:
|
||||
base = parent.base
|
||||
if base is None:
|
||||
systemId = self.locator.getPublicId() or self.locator.getSystemId()
|
||||
if systemId:
|
||||
base, frag = urldefrag(systemId)
|
||||
current.base = base
|
||||
# type error: No overlaod for "get" of "AttributesImpl" mactches tuple (str, str)
|
||||
language = attrs.get(LANG, None) # type: ignore[call-overload, unused-ignore]
|
||||
if language is None:
|
||||
if parent:
|
||||
language = parent.language
|
||||
current.language = language
|
||||
current.start(name, qname, attrs)
|
||||
|
||||
def endElementNS(self, name: Tuple[Optional[str], str], qname) -> None:
|
||||
self.current.end(name, qname)
|
||||
self.stack.pop()
|
||||
|
||||
def characters(self, content: str) -> None:
|
||||
char = self.current.char
|
||||
if char:
|
||||
char(content)
|
||||
|
||||
def ignorableWhitespace(self, content) -> None:
|
||||
pass
|
||||
|
||||
def processingInstruction(self, target, data) -> None:
|
||||
pass
|
||||
|
||||
def add_reified(self, sid: Identifier, spo: _TripleType):
|
||||
s, p, o = spo
|
||||
self.store.add((sid, RDF.type, RDF.Statement))
|
||||
self.store.add((sid, RDF.subject, s))
|
||||
self.store.add((sid, RDF.predicate, p))
|
||||
self.store.add((sid, RDF.object, o))
|
||||
|
||||
def error(self, message: str) -> NoReturn:
|
||||
locator = self.locator
|
||||
info = "%s:%s:%s: " % (
|
||||
locator.getSystemId(),
|
||||
locator.getLineNumber(),
|
||||
locator.getColumnNumber(),
|
||||
)
|
||||
raise ParserError(info + message)
|
||||
|
||||
def get_current(self) -> Optional[ElementHandler]:
|
||||
return self.stack[-2]
|
||||
|
||||
# Create a read only property called current so that self.current
|
||||
# give the current element handler.
|
||||
current = property(get_current)
|
||||
|
||||
def get_next(self) -> Optional[ElementHandler]:
|
||||
return self.stack[-1]
|
||||
|
||||
# Create a read only property that gives the element handler to be
|
||||
# used for the next element.
|
||||
next = property(get_next)
|
||||
|
||||
def get_parent(self) -> Optional[ElementHandler]:
|
||||
return self.stack[-3]
|
||||
|
||||
# Create a read only property that gives the current parent
|
||||
# element handler
|
||||
parent = property(get_parent)
|
||||
|
||||
def absolutize(self, uri: str) -> URIRef:
|
||||
# type error: Argument "allow_fragments" to "urljoin" has incompatible type "int"; expected "bool"
|
||||
result = urljoin(self.current.base, uri, allow_fragments=1) # type: ignore[arg-type]
|
||||
if uri and uri[-1] == "#" and result[-1] != "#":
|
||||
result = "%s#" % result
|
||||
return URIRef(result)
|
||||
|
||||
def convert(
|
||||
self, name: Tuple[Optional[str], str], qname, attrs: AttributesImpl
|
||||
) -> Tuple[URIRef, Dict[URIRef, str]]:
|
||||
if name[0] is None:
|
||||
# type error: Incompatible types in assignment (expression has type "URIRef", variable has type "Tuple[Optional[str], str]")
|
||||
name = URIRef(name[1]) # type: ignore[assignment]
|
||||
else:
|
||||
# type error: Incompatible types in assignment (expression has type "URIRef", variable has type "Tuple[Optional[str], str]")
|
||||
# type error: Argument 1 to "join" of "str" has incompatible type "Tuple[Optional[str], str]"; expected "Iterable[str]"
|
||||
name = URIRef("".join(name)) # type: ignore[assignment, arg-type]
|
||||
atts = {}
|
||||
for n, v in attrs.items():
|
||||
# mypy error: mypy thinks n[0]==None is unreachable
|
||||
if n[0] is None:
|
||||
att = n[1] # type: ignore[unreachable, unused-ignore]
|
||||
else:
|
||||
att = "".join(n)
|
||||
if att.startswith(XMLNS) or att[0:3].lower() == "xml":
|
||||
pass
|
||||
elif att in UNQUALIFIED:
|
||||
# if not RDFNS[att] in atts:
|
||||
# type error: Variable "att" is not valid as a type
|
||||
atts[RDFNS[att]] = v # type: ignore[misc, valid-type]
|
||||
else:
|
||||
atts[URIRef(att)] = v
|
||||
# type error: Incompatible return value type (got "Tuple[Tuple[Optional[str], str], Dict[Any, Any]]", expected "Tuple[URIRef, Dict[URIRef, str]]")
|
||||
return name, atts # type: ignore[return-value]
|
||||
|
||||
def document_element_start(
|
||||
self, name: Tuple[str, str], qname, attrs: AttributesImpl
|
||||
) -> None:
|
||||
if name[0] and URIRef("".join(name)) == RDFVOC.RDF:
|
||||
next = self.next
|
||||
next.start = self.node_element_start
|
||||
next.end = self.node_element_end
|
||||
else:
|
||||
self.node_element_start(name, qname, attrs)
|
||||
# self.current.end = self.node_element_end
|
||||
# TODO... set end to something that sets start such that
|
||||
# another element will cause error
|
||||
|
||||
def node_element_start(
|
||||
self, name: Tuple[str, str], qname, attrs: AttributesImpl
|
||||
) -> None:
|
||||
# type error: Incompatible types in assignment (expression has type "URIRef", variable has type "Tuple[str, str]")
|
||||
name, atts = self.convert(name, qname, attrs) # type: ignore[assignment]
|
||||
current = self.current
|
||||
absolutize = self.absolutize
|
||||
|
||||
next = self.next
|
||||
next.start = self.property_element_start
|
||||
next.end = self.property_element_end
|
||||
|
||||
if name in NODE_ELEMENT_EXCEPTIONS:
|
||||
# type error: Not all arguments converted during string formatting
|
||||
self.error("Invalid node element URI: %s" % name) # type: ignore[str-format]
|
||||
subject: _SubjectType
|
||||
if RDFVOC.ID in atts:
|
||||
if RDFVOC.about in atts or RDFVOC.nodeID in atts:
|
||||
self.error("Can have at most one of rdf:ID, rdf:about, and rdf:nodeID")
|
||||
|
||||
id = atts[RDFVOC.ID]
|
||||
if not is_ncname(id):
|
||||
self.error("rdf:ID value is not a valid NCName: %s" % id)
|
||||
subject = absolutize("#%s" % id)
|
||||
if subject in self.ids:
|
||||
self.error("two elements cannot use the same ID: '%s'" % subject)
|
||||
self.ids[subject] = 1 # IDs can only appear once within a document
|
||||
elif RDFVOC.nodeID in atts:
|
||||
if RDFVOC.ID in atts or RDFVOC.about in atts:
|
||||
self.error("Can have at most one of rdf:ID, rdf:about, and rdf:nodeID")
|
||||
nodeID = atts[RDFVOC.nodeID]
|
||||
if not is_ncname(nodeID):
|
||||
self.error("rdf:nodeID value is not a valid NCName: %s" % nodeID)
|
||||
if self.preserve_bnode_ids is False:
|
||||
if nodeID in self.bnode:
|
||||
subject = self.bnode[nodeID]
|
||||
else:
|
||||
subject = BNode()
|
||||
self.bnode[nodeID] = subject
|
||||
else:
|
||||
subject = BNode(nodeID)
|
||||
elif RDFVOC.about in atts:
|
||||
if RDFVOC.ID in atts or RDFVOC.nodeID in atts:
|
||||
self.error("Can have at most one of rdf:ID, rdf:about, and rdf:nodeID")
|
||||
subject = absolutize(atts[RDFVOC.about])
|
||||
else:
|
||||
subject = BNode()
|
||||
|
||||
if name != RDFVOC.Description: # S1
|
||||
# error: Argument 1 has incompatible type "Tuple[str, str]"; expected "str"
|
||||
self.store.add((subject, RDF.type, absolutize(name))) # type: ignore[arg-type]
|
||||
|
||||
object: _ObjectType
|
||||
language = current.language
|
||||
for att in atts:
|
||||
if not att.startswith(str(RDFNS)):
|
||||
predicate = absolutize(att)
|
||||
try:
|
||||
object = Literal(atts[att], language)
|
||||
except Error as e:
|
||||
# type error: Argument 1 to "error" of "RDFXMLHandler" has incompatible type "Optional[str]"; expected "str"
|
||||
self.error(e.msg) # type: ignore[arg-type]
|
||||
elif att == RDF.type: # S2
|
||||
predicate = RDF.type
|
||||
object = absolutize(atts[RDF.type])
|
||||
elif att in NODE_ELEMENT_ATTRIBUTES:
|
||||
continue
|
||||
elif att in PROPERTY_ATTRIBUTE_EXCEPTIONS: # S3
|
||||
self.error("Invalid property attribute URI: %s" % att)
|
||||
# type error: Statement is unreachable
|
||||
continue # type: ignore[unreachable] # for when error does not throw an exception
|
||||
else:
|
||||
predicate = absolutize(att)
|
||||
try:
|
||||
object = Literal(atts[att], language)
|
||||
except Error as e:
|
||||
# type error: Argument 1 to "error" of "RDFXMLHandler" has incompatible type "Optional[str]"; expected "str"
|
||||
self.error(e.msg) # type: ignore[arg-type]
|
||||
self.store.add((subject, predicate, object))
|
||||
|
||||
current.subject = subject
|
||||
|
||||
def node_element_end(self, name: Tuple[str, str], qname) -> None:
|
||||
# repeat node-elements are only allowed
|
||||
# at at top-level
|
||||
|
||||
if self.parent.object and self.current != self.stack[2]:
|
||||
self.error(
|
||||
"Repeat node-elements inside property elements: %s" % "".join(name)
|
||||
)
|
||||
|
||||
self.parent.object = self.current.subject
|
||||
|
||||
def property_element_start(
|
||||
self, name: Tuple[str, str], qname, attrs: AttributesImpl
|
||||
) -> None:
|
||||
# type error: Incompatible types in assignment (expression has type "URIRef", variable has type "Tuple[str, str]")
|
||||
name, atts = self.convert(name, qname, attrs) # type: ignore[assignment]
|
||||
current = self.current
|
||||
absolutize = self.absolutize
|
||||
|
||||
next = self.next
|
||||
object: Optional[_ObjectType] = None
|
||||
current.data = None
|
||||
current.list = None
|
||||
|
||||
# type error: "Tuple[str, str]" has no attribute "startswith"
|
||||
if not name.startswith(str(RDFNS)): # type: ignore[attr-defined]
|
||||
# type error: Argument 1 has incompatible type "Tuple[str, str]"; expected "str"
|
||||
current.predicate = absolutize(name) # type: ignore[arg-type]
|
||||
elif name == RDFVOC.li:
|
||||
current.predicate = current.next_li()
|
||||
elif name in PROPERTY_ELEMENT_EXCEPTIONS:
|
||||
# type error: Not all arguments converted during string formatting
|
||||
self.error("Invalid property element URI: %s" % name) # type: ignore[str-format]
|
||||
else:
|
||||
# type error: Argument 1 has incompatible type "Tuple[str, str]"; expected "str"
|
||||
current.predicate = absolutize(name) # type: ignore[arg-type]
|
||||
|
||||
id = atts.get(RDFVOC.ID, None)
|
||||
if id is not None:
|
||||
if not is_ncname(id):
|
||||
self.error("rdf:ID value is not a value NCName: %s" % id)
|
||||
current.id = absolutize("#%s" % id)
|
||||
else:
|
||||
current.id = None
|
||||
|
||||
resource = atts.get(RDFVOC.resource, None)
|
||||
nodeID = atts.get(RDFVOC.nodeID, None)
|
||||
parse_type = atts.get(RDFVOC.parseType, None)
|
||||
if resource is not None and nodeID is not None:
|
||||
self.error("Property element cannot have both rdf:nodeID and rdf:resource")
|
||||
if resource is not None:
|
||||
object = absolutize(resource)
|
||||
next.start = self.node_element_start
|
||||
next.end = self.node_element_end
|
||||
elif nodeID is not None:
|
||||
if not is_ncname(nodeID):
|
||||
self.error("rdf:nodeID value is not a valid NCName: %s" % nodeID)
|
||||
if self.preserve_bnode_ids is False:
|
||||
if nodeID in self.bnode:
|
||||
object = self.bnode[nodeID]
|
||||
else:
|
||||
subject = BNode()
|
||||
self.bnode[nodeID] = subject
|
||||
object = subject
|
||||
else:
|
||||
object = subject = BNode(nodeID)
|
||||
next.start = self.node_element_start
|
||||
next.end = self.node_element_end
|
||||
else:
|
||||
if parse_type is not None:
|
||||
for att in atts:
|
||||
if att != RDFVOC.parseType and att != RDFVOC.ID:
|
||||
self.error("Property attr '%s' now allowed here" % att)
|
||||
if parse_type == "Resource":
|
||||
current.subject = object = BNode()
|
||||
current.char = self.property_element_char
|
||||
next.start = self.property_element_start
|
||||
next.end = self.property_element_end
|
||||
elif parse_type == "Collection":
|
||||
current.char = None
|
||||
object = current.list = RDF.nil # BNode()
|
||||
# self.parent.subject
|
||||
next.start = self.node_element_start
|
||||
next.end = self.list_node_element_end
|
||||
else: # if parse_type=="Literal":
|
||||
# All other values are treated as Literal
|
||||
# See: http://www.w3.org/TR/rdf-syntax-grammar/
|
||||
# parseTypeOtherPropertyElt
|
||||
object = Literal("", datatype=RDFVOC.XMLLiteral)
|
||||
current.char = self.literal_element_char
|
||||
current.declared = {XMLNS: "xml"}
|
||||
next.start = self.literal_element_start
|
||||
next.char = self.literal_element_char
|
||||
next.end = self.literal_element_end
|
||||
current.object = object
|
||||
return
|
||||
else:
|
||||
object = None
|
||||
current.char = self.property_element_char
|
||||
next.start = self.node_element_start
|
||||
next.end = self.node_element_end
|
||||
|
||||
datatype = current.datatype = atts.get(RDFVOC.datatype, None)
|
||||
language = current.language
|
||||
if datatype is not None:
|
||||
# TODO: check that there are no atts other than datatype and id
|
||||
datatype = absolutize(datatype)
|
||||
else:
|
||||
for att in atts:
|
||||
if not att.startswith(str(RDFNS)):
|
||||
predicate = absolutize(att)
|
||||
elif att in PROPERTY_ELEMENT_ATTRIBUTES:
|
||||
continue
|
||||
elif att in PROPERTY_ATTRIBUTE_EXCEPTIONS:
|
||||
self.error("""Invalid property attribute URI: %s""" % att)
|
||||
else:
|
||||
predicate = absolutize(att)
|
||||
o: _ObjectType
|
||||
if att == RDF.type:
|
||||
o = URIRef(atts[att])
|
||||
else:
|
||||
if datatype is not None:
|
||||
# type error: Statement is unreachable
|
||||
language = None # type: ignore[unreachable]
|
||||
o = Literal(atts[att], language, datatype)
|
||||
|
||||
if object is None:
|
||||
object = BNode()
|
||||
self.store.add((object, predicate, o))
|
||||
if object is None:
|
||||
current.data = ""
|
||||
current.object = None
|
||||
else:
|
||||
current.data = None
|
||||
current.object = object
|
||||
|
||||
def property_element_char(self, data: str) -> None:
|
||||
current = self.current
|
||||
if current.data is not None:
|
||||
current.data += data
|
||||
|
||||
def property_element_end(self, name: Tuple[str, str], qname) -> None:
|
||||
current = self.current
|
||||
if current.data is not None and current.object is None:
|
||||
literalLang = current.language
|
||||
if current.datatype is not None:
|
||||
literalLang = None
|
||||
current.object = Literal(current.data, literalLang, current.datatype)
|
||||
current.data = None
|
||||
if self.next.end == self.list_node_element_end:
|
||||
if current.object != RDF.nil:
|
||||
self.store.add((current.list, RDF.rest, RDF.nil))
|
||||
if current.object is not None:
|
||||
self.store.add((self.parent.subject, current.predicate, current.object))
|
||||
if current.id is not None:
|
||||
self.add_reified(
|
||||
current.id, (self.parent.subject, current.predicate, current.object)
|
||||
)
|
||||
current.subject = None
|
||||
|
||||
def list_node_element_end(self, name: Tuple[str, str], qname) -> None:
|
||||
current = self.current
|
||||
if self.parent.list == RDF.nil:
|
||||
list = BNode()
|
||||
# Removed between 20030123 and 20030905
|
||||
# self.store.add((list, RDF.type, LIST))
|
||||
self.parent.list = list
|
||||
self.store.add((self.parent.list, RDF.first, current.subject))
|
||||
self.parent.object = list
|
||||
self.parent.char = None
|
||||
else:
|
||||
list = BNode()
|
||||
# Removed between 20030123 and 20030905
|
||||
# self.store.add((list, RDF.type, LIST))
|
||||
self.store.add((self.parent.list, RDF.rest, list))
|
||||
self.store.add((list, RDF.first, current.subject))
|
||||
self.parent.list = list
|
||||
|
||||
def literal_element_start(
|
||||
self, name: Tuple[str, str], qname, attrs: AttributesImpl
|
||||
) -> None:
|
||||
current = self.current
|
||||
self.next.start = self.literal_element_start
|
||||
self.next.char = self.literal_element_char
|
||||
self.next.end = self.literal_element_end
|
||||
current.declared = self.parent.declared.copy()
|
||||
if name[0]:
|
||||
prefix = self._current_context[name[0]]
|
||||
if prefix:
|
||||
current.object = "<%s:%s" % (prefix, name[1])
|
||||
else:
|
||||
current.object = "<%s" % name[1]
|
||||
if not name[0] in current.declared: # noqa: E713
|
||||
current.declared[name[0]] = prefix
|
||||
if prefix:
|
||||
current.object += ' xmlns:%s="%s"' % (prefix, name[0])
|
||||
else:
|
||||
current.object += ' xmlns="%s"' % name[0]
|
||||
else:
|
||||
current.object = "<%s" % name[1]
|
||||
# type error: Incompatible types in assignment (expression has type "str", variable has type "Tuple[str, str]")
|
||||
for name, value in attrs.items(): # type: ignore[assignment, unused-ignore]
|
||||
if name[0]:
|
||||
if not name[0] in current.declared: # noqa: E713
|
||||
current.declared[name[0]] = self._current_context[name[0]]
|
||||
name = current.declared[name[0]] + ":" + name[1]
|
||||
else:
|
||||
# type error: Incompatible types in assignment (expression has type "str", variable has type "Tuple[str, str]")
|
||||
name = name[1] # type: ignore[assignment]
|
||||
current.object += " %s=%s" % (name, quoteattr(value))
|
||||
current.object += ">"
|
||||
|
||||
def literal_element_char(self, data: str) -> None:
|
||||
self.current.object += escape(data)
|
||||
|
||||
def literal_element_end(self, name: Tuple[str, str], qname) -> None:
|
||||
if name[0]:
|
||||
prefix = self._current_context[name[0]]
|
||||
if prefix:
|
||||
end = "</%s:%s>" % (prefix, name[1])
|
||||
else:
|
||||
end = "</%s>" % name[1]
|
||||
else:
|
||||
end = "</%s>" % name[1]
|
||||
self.parent.object += self.current.object + end
|
||||
|
||||
|
||||
def create_parser(target: InputSource, store: Graph) -> xmlreader.XMLReader:
|
||||
parser = make_parser()
|
||||
try:
|
||||
# Workaround for bug in expatreader.py. Needed when
|
||||
# expatreader is trying to guess a prefix.
|
||||
parser.start_namespace_decl("xml", "http://www.w3.org/XML/1998/namespace") # type: ignore[attr-defined]
|
||||
except AttributeError:
|
||||
pass # Not present in Jython (at least)
|
||||
parser.setFeature(handler.feature_namespaces, 1)
|
||||
rdfxml = RDFXMLHandler(store)
|
||||
# type error: Argument 1 to "setDocumentLocator" of "RDFXMLHandler" has incompatible type "InputSource"; expected "Locator"
|
||||
rdfxml.setDocumentLocator(target) # type: ignore[arg-type]
|
||||
# rdfxml.setDocumentLocator(_Locator(self.url, self.parser))
|
||||
parser.setContentHandler(rdfxml)
|
||||
parser.setErrorHandler(ErrorHandler())
|
||||
return parser
|
||||
|
||||
|
||||
class RDFXMLParser(Parser):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def parse(self, source: InputSource, sink: Graph, **args: Any) -> None:
|
||||
self._parser = create_parser(source, sink)
|
||||
content_handler = self._parser.getContentHandler()
|
||||
preserve_bnode_ids = args.get("preserve_bnode_ids", None)
|
||||
if preserve_bnode_ids is not None:
|
||||
# type error: ContentHandler has no attribute "preserve_bnode_ids"
|
||||
content_handler.preserve_bnode_ids = preserve_bnode_ids # type: ignore[attr-defined, unused-ignore]
|
||||
# # We're only using it once now
|
||||
# content_handler.reset()
|
||||
# self._parser.reset()
|
||||
self._parser.parse(source)
|
||||
@@ -0,0 +1,177 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, MutableSequence
|
||||
|
||||
from rdflib.graph import ConjunctiveGraph, Graph
|
||||
from rdflib.parser import InputSource, Parser
|
||||
|
||||
from .notation3 import RDFSink, SinkParser
|
||||
|
||||
|
||||
def becauseSubGraph(*args, **kwargs): # noqa: N802
|
||||
pass
|
||||
|
||||
|
||||
class TrigSinkParser(SinkParser):
|
||||
def directiveOrStatement(self, argstr: str, h: int) -> int: # noqa: N802
|
||||
# import pdb; pdb.set_trace()
|
||||
|
||||
i = self.skipSpace(argstr, h)
|
||||
if i < 0:
|
||||
return i # EOF
|
||||
|
||||
j = self.graph(argstr, i)
|
||||
if j >= 0:
|
||||
return j
|
||||
|
||||
j = self.sparqlDirective(argstr, i)
|
||||
if j >= 0:
|
||||
return j
|
||||
|
||||
j = self.directive(argstr, i)
|
||||
if j >= 0:
|
||||
return self.checkDot(argstr, j)
|
||||
|
||||
j = self.statement(argstr, i)
|
||||
if j >= 0:
|
||||
return self.checkDot(argstr, j)
|
||||
|
||||
return j
|
||||
|
||||
def labelOrSubject( # noqa: N802
|
||||
self, argstr: str, i: int, res: MutableSequence[Any]
|
||||
) -> int:
|
||||
j = self.skipSpace(argstr, i)
|
||||
if j < 0:
|
||||
return j # eof
|
||||
i = j
|
||||
|
||||
j = self.uri_ref2(argstr, i, res)
|
||||
if j >= 0:
|
||||
return j
|
||||
|
||||
if argstr[i] == "[":
|
||||
j = self.skipSpace(argstr, i + 1)
|
||||
if j < 0:
|
||||
self.BadSyntax(argstr, i, "Expected ] got EOF")
|
||||
if argstr[j] == "]":
|
||||
res.append(self.blankNode())
|
||||
return j + 1
|
||||
return -1
|
||||
|
||||
def graph(self, argstr: str, i: int) -> int:
|
||||
"""
|
||||
Parse trig graph, i.e.
|
||||
|
||||
<urn:graphname> = { .. triples .. }
|
||||
|
||||
return -1 if it doesn't look like a graph-decl
|
||||
raise Exception if it looks like a graph, but isn't.
|
||||
"""
|
||||
|
||||
need_graphid = False
|
||||
# import pdb; pdb.set_trace()
|
||||
j = self.sparqlTok("GRAPH", argstr, i) # optional GRAPH keyword
|
||||
if j >= 0:
|
||||
i = j
|
||||
need_graphid = True
|
||||
|
||||
r: MutableSequence[Any] = []
|
||||
j = self.labelOrSubject(argstr, i, r)
|
||||
if j >= 0:
|
||||
graph = r[0]
|
||||
i = j
|
||||
elif need_graphid:
|
||||
self.BadSyntax(argstr, i, "GRAPH keyword must be followed by graph name")
|
||||
else:
|
||||
graph = self._store.graph.identifier # hack
|
||||
|
||||
j = self.skipSpace(argstr, i)
|
||||
if j < 0:
|
||||
self.BadSyntax(argstr, i, "EOF found when expected graph")
|
||||
|
||||
if argstr[j : j + 1] == "=": # optional = for legacy support
|
||||
i = self.skipSpace(argstr, j + 1)
|
||||
if i < 0:
|
||||
self.BadSyntax(argstr, i, "EOF found when expecting '{'")
|
||||
else:
|
||||
i = j
|
||||
|
||||
if argstr[i : i + 1] != "{":
|
||||
return -1 # the node wasn't part of a graph
|
||||
|
||||
j = i + 1
|
||||
|
||||
if self._context is not None:
|
||||
self.BadSyntax(argstr, i, "Nested graphs are not allowed")
|
||||
|
||||
oldParentContext = self._parentContext # noqa: N806
|
||||
self._parentContext = self._context
|
||||
reason2 = self._reason2
|
||||
self._reason2 = becauseSubGraph
|
||||
# type error: Incompatible types in assignment (expression has type "Graph", variable has type "Optional[Formula]")
|
||||
self._context = self._store.newGraph(graph) # type: ignore[assignment]
|
||||
|
||||
while 1:
|
||||
i = self.skipSpace(argstr, j)
|
||||
if i < 0:
|
||||
self.BadSyntax(argstr, i, "needed '}', found end.")
|
||||
|
||||
if argstr[i : i + 1] == "}":
|
||||
j = i + 1
|
||||
break
|
||||
|
||||
j = self.directiveOrStatement(argstr, i)
|
||||
if j < 0:
|
||||
self.BadSyntax(argstr, i, "expected statement or '}'")
|
||||
|
||||
self._context = self._parentContext
|
||||
self._reason2 = reason2
|
||||
self._parentContext = oldParentContext
|
||||
# res.append(subj.close()) # No use until closed
|
||||
return j
|
||||
|
||||
|
||||
class TrigParser(Parser):
|
||||
"""
|
||||
An RDFLib parser for TriG
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def parse(self, source: InputSource, graph: Graph, encoding: str = "utf-8") -> None:
|
||||
if encoding not in [None, "utf-8"]:
|
||||
raise Exception(
|
||||
# type error: Unsupported left operand type for % ("Tuple[str, str]")
|
||||
("TriG files are always utf-8 encoded, ", "I was passed: %s") # type: ignore[operator]
|
||||
% encoding
|
||||
)
|
||||
|
||||
# we're currently being handed a Graph, not a ConjunctiveGraph
|
||||
assert graph.store.context_aware, "TriG Parser needs a context-aware store!"
|
||||
|
||||
conj_graph = ConjunctiveGraph(store=graph.store, identifier=graph.identifier)
|
||||
conj_graph.default_context = graph # TODO: CG __init__ should have a
|
||||
# default_context arg
|
||||
# TODO: update N3Processor so that it can use conj_graph as the sink
|
||||
conj_graph.namespace_manager = graph.namespace_manager
|
||||
|
||||
sink = RDFSink(conj_graph)
|
||||
|
||||
baseURI = conj_graph.absolutize( # noqa: N806
|
||||
source.getPublicId() or source.getSystemId() or ""
|
||||
)
|
||||
p = TrigSinkParser(sink, baseURI=baseURI, turtle=True)
|
||||
|
||||
stream = source.getCharacterStream() # try to get str stream first
|
||||
if not stream:
|
||||
# fallback to get the bytes stream
|
||||
stream = source.getByteStream()
|
||||
p.loadStream(stream)
|
||||
|
||||
for prefix, namespace in p._bindings.items():
|
||||
conj_graph.bind(prefix, namespace)
|
||||
|
||||
# return ???
|
||||
@@ -0,0 +1,296 @@
|
||||
"""
|
||||
A TriX parser for RDFLib
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, NoReturn, Optional, Tuple
|
||||
from xml.sax import handler, make_parser
|
||||
from xml.sax.handler import ErrorHandler
|
||||
|
||||
from rdflib.exceptions import ParserError
|
||||
from rdflib.graph import Graph
|
||||
from rdflib.namespace import Namespace
|
||||
from rdflib.parser import InputSource, Parser
|
||||
from rdflib.store import Store
|
||||
from rdflib.term import BNode, Identifier, Literal, URIRef
|
||||
|
||||
if TYPE_CHECKING:
|
||||
# from xml.sax.expatreader import ExpatLocator
|
||||
from xml.sax.xmlreader import AttributesImpl, Locator, XMLReader
|
||||
|
||||
__all__ = ["create_parser", "TriXHandler", "TriXParser"]
|
||||
|
||||
|
||||
TRIXNS = Namespace("http://www.w3.org/2004/03/trix/trix-1/")
|
||||
XMLNS = Namespace("http://www.w3.org/XML/1998/namespace")
|
||||
|
||||
|
||||
class TriXHandler(handler.ContentHandler):
|
||||
"""An Sax Handler for TriX. See http://sw.nokia.com/trix/"""
|
||||
|
||||
lang: Optional[str]
|
||||
datatype: Optional[str]
|
||||
|
||||
def __init__(self, store: Store):
|
||||
self.store = store
|
||||
self.preserve_bnode_ids = False
|
||||
self.reset()
|
||||
|
||||
def reset(self) -> None:
|
||||
self.bnode: Dict[str, BNode] = {}
|
||||
self.graph: Optional[Graph] = None
|
||||
self.triple: Optional[List[Identifier]] = None
|
||||
self.state = 0
|
||||
self.lang = None
|
||||
self.datatype = None
|
||||
|
||||
# ContentHandler methods
|
||||
|
||||
def setDocumentLocator(self, locator: Locator):
|
||||
self.locator = locator
|
||||
|
||||
def startDocument(self) -> None:
|
||||
pass
|
||||
|
||||
def startPrefixMapping(self, prefix: Optional[str], namespace: str) -> None:
|
||||
pass
|
||||
|
||||
def endPrefixMapping(self, prefix: Optional[str]) -> None:
|
||||
pass
|
||||
|
||||
def startElementNS(
|
||||
self, name: Tuple[Optional[str], str], qname, attrs: AttributesImpl
|
||||
) -> None:
|
||||
if name[0] != str(TRIXNS):
|
||||
self.error(
|
||||
"Only elements in the TriX namespace are allowed. %s!=%s"
|
||||
% (name[0], TRIXNS)
|
||||
)
|
||||
|
||||
if name[1].lower() == "trix":
|
||||
if self.state == 0:
|
||||
self.state = 1
|
||||
else:
|
||||
self.error("Unexpected TriX element")
|
||||
|
||||
elif name[1] == "graph":
|
||||
if self.state == 1:
|
||||
self.state = 2
|
||||
else:
|
||||
self.error("Unexpected graph element")
|
||||
|
||||
elif name[1] == "uri":
|
||||
if self.state == 2:
|
||||
# the context uri
|
||||
self.state = 3
|
||||
elif self.state == 4:
|
||||
# part of a triple
|
||||
pass
|
||||
else:
|
||||
self.error("Unexpected uri element")
|
||||
|
||||
elif name[1] == "triple":
|
||||
if self.state == 2:
|
||||
if self.graph is None:
|
||||
# anonymous graph, create one with random bnode id
|
||||
self.graph = Graph(store=self.store)
|
||||
# start of a triple
|
||||
self.triple = []
|
||||
self.state = 4
|
||||
else:
|
||||
self.error("Unexpected triple element")
|
||||
|
||||
elif name[1] == "typedLiteral":
|
||||
if self.state == 4:
|
||||
# part of triple
|
||||
self.lang = None
|
||||
self.datatype = None
|
||||
|
||||
try:
|
||||
self.lang = attrs.getValue((str(XMLNS), "lang")) # type: ignore[arg-type, unused-ignore]
|
||||
except Exception:
|
||||
# language not required - ignore
|
||||
pass
|
||||
try:
|
||||
self.datatype = attrs.getValueByQName("datatype") # type: ignore[arg-type, unused-ignore]
|
||||
except KeyError:
|
||||
self.error("No required attribute 'datatype'")
|
||||
else:
|
||||
self.error("Unexpected typedLiteral element")
|
||||
|
||||
elif name[1] == "plainLiteral":
|
||||
if self.state == 4:
|
||||
# part of triple
|
||||
self.lang = None
|
||||
self.datatype = None
|
||||
try:
|
||||
# type error: Argument 1 to "getValue" of "AttributesImpl" has incompatible type "Tuple[str, str]"; expected "str"
|
||||
self.lang = attrs.getValue((str(XMLNS), "lang")) # type: ignore[arg-type, unused-ignore]
|
||||
except Exception:
|
||||
# language not required - ignore
|
||||
pass
|
||||
|
||||
else:
|
||||
self.error("Unexpected plainLiteral element")
|
||||
|
||||
elif name[1] == "id":
|
||||
if self.state == 2:
|
||||
# the context uri
|
||||
self.state = 3
|
||||
|
||||
elif self.state == 4:
|
||||
# part of triple
|
||||
pass
|
||||
else:
|
||||
self.error("Unexpected id element")
|
||||
|
||||
else:
|
||||
self.error("Unknown element %s in TriX namespace" % name[1])
|
||||
|
||||
self.chars = ""
|
||||
|
||||
def endElementNS(self, name: Tuple[Optional[str], str], qname) -> None:
|
||||
if TYPE_CHECKING:
|
||||
assert self.triple is not None
|
||||
if name[0] != str(TRIXNS):
|
||||
self.error(
|
||||
"Only elements in the TriX namespace are allowed. %s!=%s"
|
||||
% (name[0], TRIXNS)
|
||||
)
|
||||
|
||||
if name[1] == "uri":
|
||||
if self.state == 3:
|
||||
self.graph = Graph(
|
||||
store=self.store, identifier=URIRef(self.chars.strip())
|
||||
)
|
||||
self.state = 2
|
||||
elif self.state == 4:
|
||||
self.triple += [URIRef(self.chars.strip())]
|
||||
else:
|
||||
self.error(
|
||||
"Illegal internal self.state - This should never "
|
||||
+ "happen if the SAX parser ensures XML syntax correctness"
|
||||
)
|
||||
|
||||
elif name[1] == "id":
|
||||
if self.state == 3:
|
||||
self.graph = Graph(
|
||||
self.store, identifier=self.get_bnode(self.chars.strip())
|
||||
)
|
||||
self.state = 2
|
||||
elif self.state == 4:
|
||||
self.triple += [self.get_bnode(self.chars.strip())]
|
||||
else:
|
||||
self.error(
|
||||
"Illegal internal self.state - This should never "
|
||||
+ "happen if the SAX parser ensures XML syntax correctness"
|
||||
)
|
||||
|
||||
elif name[1] == "plainLiteral" or name[1] == "typedLiteral":
|
||||
if self.state == 4:
|
||||
self.triple += [
|
||||
Literal(self.chars, lang=self.lang, datatype=self.datatype)
|
||||
]
|
||||
else:
|
||||
self.error(
|
||||
"This should never happen if the SAX parser "
|
||||
+ "ensures XML syntax correctness"
|
||||
)
|
||||
|
||||
elif name[1] == "triple":
|
||||
if self.state == 4:
|
||||
if len(self.triple) != 3:
|
||||
self.error(
|
||||
"Triple has wrong length, got %d elements: %s"
|
||||
% (len(self.triple), self.triple)
|
||||
)
|
||||
# type error: Item "None" of "Optional[Graph]" has no attribute "add"
|
||||
# type error: Argument 1 to "add" of "Graph" has incompatible type "List[Identifier]"; expected "Tuple[Node, Node, Node]"
|
||||
self.graph.add(self.triple) # type: ignore[union-attr, arg-type]
|
||||
# self.store.store.add(self.triple,context=self.graph)
|
||||
# self.store.addN([self.triple+[self.graph]])
|
||||
self.state = 2
|
||||
else:
|
||||
self.error(
|
||||
"This should never happen if the SAX parser "
|
||||
+ "ensures XML syntax correctness"
|
||||
)
|
||||
|
||||
elif name[1] == "graph":
|
||||
self.graph = None
|
||||
self.state = 1
|
||||
|
||||
elif name[1].lower() == "trix":
|
||||
self.state = 0
|
||||
|
||||
else:
|
||||
self.error("Unexpected close element")
|
||||
|
||||
def get_bnode(self, label: str) -> BNode:
|
||||
if self.preserve_bnode_ids:
|
||||
bn = BNode(label)
|
||||
else:
|
||||
if label in self.bnode:
|
||||
bn = self.bnode[label]
|
||||
else:
|
||||
bn = BNode(label)
|
||||
self.bnode[label] = bn
|
||||
return bn
|
||||
|
||||
def characters(self, content: str) -> None:
|
||||
self.chars += content
|
||||
|
||||
def ignorableWhitespace(self, content) -> None:
|
||||
pass
|
||||
|
||||
def processingInstruction(self, target, data) -> None:
|
||||
pass
|
||||
|
||||
def error(self, message: str) -> NoReturn:
|
||||
locator = self.locator
|
||||
info = "%s:%s:%s: " % (
|
||||
locator.getSystemId(),
|
||||
locator.getLineNumber(),
|
||||
locator.getColumnNumber(),
|
||||
)
|
||||
raise ParserError(info + message)
|
||||
|
||||
|
||||
def create_parser(store: Store) -> XMLReader:
|
||||
parser = make_parser()
|
||||
try:
|
||||
# Workaround for bug in expatreader.py. Needed when
|
||||
# expatreader is trying to guess a prefix.
|
||||
# type error: "XMLReader" has no attribute "start_namespace_decl"
|
||||
parser.start_namespace_decl("xml", "http://www.w3.org/XML/1998/namespace") # type: ignore[attr-defined]
|
||||
except AttributeError:
|
||||
pass # Not present in Jython (at least)
|
||||
parser.setFeature(handler.feature_namespaces, 1)
|
||||
trix = TriXHandler(store)
|
||||
parser.setContentHandler(trix)
|
||||
parser.setErrorHandler(ErrorHandler())
|
||||
return parser
|
||||
|
||||
|
||||
class TriXParser(Parser):
|
||||
"""A parser for TriX. See http://sw.nokia.com/trix/"""
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def parse(self, source: InputSource, sink: Graph, **args: Any) -> None:
|
||||
assert (
|
||||
sink.store.context_aware
|
||||
), "TriXParser must be given a context aware store."
|
||||
|
||||
self._parser = create_parser(sink.store)
|
||||
content_handler = self._parser.getContentHandler()
|
||||
preserve_bnode_ids = args.get("preserve_bnode_ids", None)
|
||||
if preserve_bnode_ids is not None:
|
||||
# type error: ContentHandler has no attribute "preserve_bnode_ids"
|
||||
content_handler.preserve_bnode_ids = preserve_bnode_ids # type: ignore[attr-defined, unused-ignore]
|
||||
# We're only using it once now
|
||||
# content_handler.reset()
|
||||
# self._parser.reset()
|
||||
self._parser.parse(source)
|
||||
@@ -0,0 +1,207 @@
|
||||
"""
|
||||
HextuplesSerializer RDF graph serializer for RDFLib.
|
||||
See <https://github.com/ontola/hextuples> for details about the format.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import warnings
|
||||
from typing import IO, Any, Callable, List, Optional, Type, Union, cast
|
||||
|
||||
from rdflib.graph import DATASET_DEFAULT_GRAPH_ID, ConjunctiveGraph, Dataset, Graph
|
||||
from rdflib.namespace import RDF, XSD
|
||||
from rdflib.serializer import Serializer
|
||||
from rdflib.term import BNode, IdentifiedNode, Literal, URIRef
|
||||
|
||||
try:
|
||||
import orjson
|
||||
|
||||
_HAS_ORJSON = True
|
||||
except ImportError:
|
||||
orjson = None # type: ignore[assignment, unused-ignore]
|
||||
_HAS_ORJSON = False
|
||||
|
||||
__all__ = ["HextuplesSerializer"]
|
||||
|
||||
|
||||
class HextuplesSerializer(Serializer):
|
||||
"""
|
||||
Serializes RDF graphs to NTriples format.
|
||||
"""
|
||||
|
||||
contexts: List[Union[Graph, IdentifiedNode]]
|
||||
dumps: Callable
|
||||
|
||||
def __new__(cls, store: Union[Graph, Dataset, ConjunctiveGraph]):
|
||||
if _HAS_ORJSON:
|
||||
cls.str_local_id: Union[str, Any] = orjson.Fragment(b'"localId"')
|
||||
cls.str_global_id: Union[str, Any] = orjson.Fragment(b'"globalId"')
|
||||
cls.empty: Union[str, Any] = orjson.Fragment(b'""')
|
||||
cls.lang_str: Union[str, Any] = orjson.Fragment(
|
||||
b'"' + RDF.langString.encode("utf-8") + b'"'
|
||||
)
|
||||
cls.xsd_string: Union[str, Any] = orjson.Fragment(
|
||||
b'"' + XSD.string.encode("utf-8") + b'"'
|
||||
)
|
||||
else:
|
||||
cls.str_local_id = "localId"
|
||||
cls.str_global_id = "globalId"
|
||||
cls.empty = ""
|
||||
cls.lang_str = f"{RDF.langString}"
|
||||
cls.xsd_string = f"{XSD.string}"
|
||||
return super(cls, cls).__new__(cls)
|
||||
|
||||
def __init__(self, store: Union[Graph, Dataset, ConjunctiveGraph]):
|
||||
self.default_context: Optional[Union[Graph, IdentifiedNode]]
|
||||
self.graph_type: Union[Type[Graph], Type[Dataset], Type[ConjunctiveGraph]]
|
||||
if isinstance(store, (Dataset, ConjunctiveGraph)):
|
||||
self.graph_type = (
|
||||
Dataset if isinstance(store, Dataset) else ConjunctiveGraph
|
||||
)
|
||||
self.contexts = list(store.contexts())
|
||||
if store.default_context:
|
||||
self.default_context = store.default_context
|
||||
self.contexts.append(store.default_context)
|
||||
else:
|
||||
self.default_context = None
|
||||
else:
|
||||
self.graph_type = Graph
|
||||
self.contexts = [store]
|
||||
self.default_context = None
|
||||
|
||||
Serializer.__init__(self, store)
|
||||
|
||||
def serialize(
|
||||
self,
|
||||
stream: IO[bytes],
|
||||
base: Optional[str] = None,
|
||||
encoding: Optional[str] = "utf-8",
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
if base is not None:
|
||||
warnings.warn(
|
||||
"base has no meaning for Hextuples serialization. "
|
||||
"I will ignore this value"
|
||||
)
|
||||
|
||||
if encoding not in [None, "utf-8"]:
|
||||
warnings.warn(
|
||||
f"Hextuples files are always utf-8 encoded. "
|
||||
f"I was passed: {encoding}, "
|
||||
"but I'm still going to use utf-8 anyway!"
|
||||
)
|
||||
|
||||
if self.store.formula_aware is True:
|
||||
raise Exception(
|
||||
"Hextuple serialization can't (yet) handle formula-aware stores"
|
||||
)
|
||||
context: Union[Graph, IdentifiedNode]
|
||||
context_str: Union[bytes, str]
|
||||
for context in self.contexts:
|
||||
for triple in context:
|
||||
# Generate context string just once, because it doesn't change
|
||||
# for every triple in this context
|
||||
context_str = cast(
|
||||
Union[str, bytes],
|
||||
(
|
||||
self.empty
|
||||
if self.graph_type is Graph
|
||||
else (
|
||||
orjson.Fragment('"' + self._context_str(context) + '"')
|
||||
if _HAS_ORJSON
|
||||
else self._context_str(context)
|
||||
)
|
||||
),
|
||||
)
|
||||
hl = self._hex_line(triple, context_str)
|
||||
if hl is not None:
|
||||
stream.write(hl if _HAS_ORJSON else hl.encode())
|
||||
|
||||
def _hex_line(self, triple, context_str: Union[bytes, str]):
|
||||
if isinstance(
|
||||
triple[0], (URIRef, BNode)
|
||||
): # exclude QuotedGraph and other objects
|
||||
# value
|
||||
value = (
|
||||
triple[2]
|
||||
if isinstance(triple[2], Literal)
|
||||
else self._iri_or_bn(triple[2])
|
||||
)
|
||||
|
||||
# datatype
|
||||
if isinstance(triple[2], URIRef):
|
||||
# datatype = "http://www.w3.org/1999/02/22-rdf-syntax-ns#namedNode"
|
||||
datatype = self.str_global_id
|
||||
elif isinstance(triple[2], BNode):
|
||||
# datatype = "http://www.w3.org/1999/02/22-rdf-syntax-ns#blankNode"
|
||||
datatype = self.str_local_id
|
||||
elif isinstance(triple[2], Literal):
|
||||
if triple[2].datatype is not None:
|
||||
datatype = f"{triple[2].datatype}"
|
||||
else:
|
||||
if triple[2].language is not None: # language
|
||||
datatype = self.lang_str
|
||||
else:
|
||||
datatype = self.xsd_string
|
||||
else:
|
||||
return None # can't handle non URI, BN or Literal Object (QuotedGraph)
|
||||
|
||||
# language
|
||||
if isinstance(triple[2], Literal):
|
||||
if triple[2].language is not None:
|
||||
language = f"{triple[2].language}"
|
||||
else:
|
||||
language = self.empty
|
||||
else:
|
||||
language = self.empty
|
||||
line_list = [
|
||||
self._iri_or_bn(triple[0]),
|
||||
triple[1],
|
||||
value,
|
||||
datatype,
|
||||
language,
|
||||
context_str,
|
||||
]
|
||||
outline: Union[str, bytes]
|
||||
if _HAS_ORJSON:
|
||||
outline = orjson.dumps(line_list, option=orjson.OPT_APPEND_NEWLINE)
|
||||
else:
|
||||
outline = json.dumps(line_list) + "\n"
|
||||
return outline
|
||||
else: # do not return anything for non-IRIs or BNs, e.g. QuotedGraph, Subjects
|
||||
return None
|
||||
|
||||
def _iri_or_bn(self, i_):
|
||||
if isinstance(i_, URIRef):
|
||||
return f"{i_}"
|
||||
elif isinstance(i_, BNode):
|
||||
return f"{i_.n3()}"
|
||||
else:
|
||||
return None
|
||||
|
||||
def _context_str(self, context: Union[Graph, IdentifiedNode]) -> str:
|
||||
context_identifier: IdentifiedNode = (
|
||||
context.identifier if isinstance(context, Graph) else context
|
||||
)
|
||||
if context_identifier == DATASET_DEFAULT_GRAPH_ID:
|
||||
return ""
|
||||
if self.default_context is not None:
|
||||
if (
|
||||
isinstance(self.default_context, IdentifiedNode)
|
||||
and context_identifier == self.default_context
|
||||
):
|
||||
return ""
|
||||
elif (
|
||||
isinstance(self.default_context, Graph)
|
||||
and context_identifier == self.default_context.identifier
|
||||
):
|
||||
return ""
|
||||
if self.graph_type is Graph:
|
||||
# Only emit a context name when serializing a Dataset or ConjunctiveGraph
|
||||
return ""
|
||||
return (
|
||||
f"{context_identifier}"
|
||||
if isinstance(context_identifier, URIRef)
|
||||
else context_identifier.n3()
|
||||
)
|
||||
@@ -0,0 +1,433 @@
|
||||
"""
|
||||
This serialiser will output an RDF Graph as a JSON-LD formatted document. See:
|
||||
|
||||
http://json-ld.org/
|
||||
|
||||
Example usage::
|
||||
|
||||
>>> from rdflib import Graph
|
||||
>>> testrdf = '''
|
||||
... @prefix dc: <http://purl.org/dc/terms/> .
|
||||
... <http://example.org/about>
|
||||
... dc:title "Someone's Homepage"@en .
|
||||
... '''
|
||||
|
||||
>>> g = Graph().parse(data=testrdf, format='n3')
|
||||
|
||||
>>> print(g.serialize(format='json-ld', indent=2))
|
||||
[
|
||||
{
|
||||
"@id": "http://example.org/about",
|
||||
"http://purl.org/dc/terms/title": [
|
||||
{
|
||||
"@language": "en",
|
||||
"@value": "Someone's Homepage"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
|
||||
"""
|
||||
|
||||
# From: https://github.com/RDFLib/rdflib-jsonld/blob/feature/json-ld-1.1/rdflib_jsonld/serializer.py
|
||||
|
||||
# NOTE: This code writes the entire JSON object into memory before serialising,
|
||||
# but we should consider streaming the output to deal with arbitrarily large
|
||||
# graphs.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import warnings
|
||||
from typing import IO, Any, Dict, List, Optional
|
||||
|
||||
from rdflib.graph import DATASET_DEFAULT_GRAPH_ID, Graph, _ObjectType
|
||||
from rdflib.namespace import RDF, XSD
|
||||
from rdflib.serializer import Serializer
|
||||
from rdflib.term import BNode, IdentifiedNode, Identifier, Literal, URIRef
|
||||
|
||||
from ..shared.jsonld.context import UNDEF, Context
|
||||
from ..shared.jsonld.keys import CONTEXT, GRAPH, ID, LANG, LIST, SET, VOCAB
|
||||
from ..shared.jsonld.util import _HAS_ORJSON, json, orjson
|
||||
|
||||
__all__ = ["JsonLDSerializer", "from_rdf"]
|
||||
|
||||
|
||||
PLAIN_LITERAL_TYPES = {XSD.boolean, XSD.integer, XSD.double, XSD.string}
|
||||
|
||||
|
||||
class JsonLDSerializer(Serializer):
|
||||
def __init__(self, store: Graph):
|
||||
super(JsonLDSerializer, self).__init__(store)
|
||||
|
||||
def serialize(
|
||||
self,
|
||||
stream: IO[bytes],
|
||||
base: Optional[str] = None,
|
||||
encoding: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
# TODO: docstring w. args and return value
|
||||
encoding = encoding or "utf-8"
|
||||
if encoding not in ("utf-8", "utf-16"):
|
||||
warnings.warn(
|
||||
"JSON should be encoded as unicode. " f"Given encoding was: {encoding}"
|
||||
)
|
||||
|
||||
context_data = kwargs.get("context")
|
||||
use_native_types = (kwargs.get("use_native_types", False),)
|
||||
use_rdf_type = kwargs.get("use_rdf_type", False)
|
||||
auto_compact = kwargs.get("auto_compact", False)
|
||||
|
||||
indent = kwargs.get("indent", 2)
|
||||
separators = kwargs.get("separators", (",", ": "))
|
||||
sort_keys = kwargs.get("sort_keys", True)
|
||||
ensure_ascii = kwargs.get("ensure_ascii", False)
|
||||
|
||||
obj = from_rdf(
|
||||
self.store,
|
||||
context_data,
|
||||
base,
|
||||
use_native_types,
|
||||
use_rdf_type,
|
||||
auto_compact=auto_compact,
|
||||
)
|
||||
if _HAS_ORJSON:
|
||||
option: int = orjson.OPT_NON_STR_KEYS
|
||||
if indent is not None:
|
||||
option |= orjson.OPT_INDENT_2
|
||||
if sort_keys:
|
||||
option |= orjson.OPT_SORT_KEYS
|
||||
if ensure_ascii:
|
||||
warnings.warn("Cannot use ensure_ascii with orjson")
|
||||
data_bytes = orjson.dumps(obj, option=option)
|
||||
stream.write(data_bytes)
|
||||
else:
|
||||
data = json.dumps(
|
||||
obj,
|
||||
indent=indent,
|
||||
separators=separators,
|
||||
sort_keys=sort_keys,
|
||||
ensure_ascii=ensure_ascii,
|
||||
)
|
||||
stream.write(data.encode(encoding, "replace"))
|
||||
|
||||
|
||||
def from_rdf(
|
||||
graph,
|
||||
context_data=None,
|
||||
base=None,
|
||||
use_native_types=False,
|
||||
use_rdf_type=False,
|
||||
auto_compact=False,
|
||||
startnode=None,
|
||||
index=False,
|
||||
):
|
||||
# TODO: docstring w. args and return value
|
||||
# TODO: support for index and startnode
|
||||
|
||||
if not context_data and auto_compact:
|
||||
context_data = dict(
|
||||
(pfx, str(ns))
|
||||
for (pfx, ns) in graph.namespaces()
|
||||
if pfx and str(ns) != "http://www.w3.org/XML/1998/namespace"
|
||||
)
|
||||
|
||||
if isinstance(context_data, Context):
|
||||
context = context_data
|
||||
context_data = context.to_dict()
|
||||
else:
|
||||
context = Context(context_data, base=base)
|
||||
|
||||
converter = Converter(context, use_native_types, use_rdf_type)
|
||||
result = converter.convert(graph)
|
||||
|
||||
if converter.context.active:
|
||||
if isinstance(result, list):
|
||||
result = {context.get_key(GRAPH): result}
|
||||
result[CONTEXT] = context_data
|
||||
|
||||
return result
|
||||
|
||||
|
||||
class Converter:
|
||||
def __init__(self, context: Context, use_native_types: bool, use_rdf_type: bool):
|
||||
self.context = context
|
||||
self.use_native_types = context.active or use_native_types
|
||||
self.use_rdf_type = use_rdf_type
|
||||
|
||||
def convert(self, graph: Graph):
|
||||
# TODO: bug in rdflib dataset parsing (nquads et al):
|
||||
# plain triples end up in separate unnamed graphs (rdflib issue #436)
|
||||
if graph.context_aware:
|
||||
# type error: "Graph" has no attribute "contexts"
|
||||
all_contexts = list(graph.contexts()) # type: ignore[attr-defined]
|
||||
has_dataset_default_id = any(
|
||||
c.identifier == DATASET_DEFAULT_GRAPH_ID for c in all_contexts
|
||||
)
|
||||
if (
|
||||
has_dataset_default_id
|
||||
# # type error: "Graph" has no attribute "contexts"
|
||||
and graph.default_context.identifier == DATASET_DEFAULT_GRAPH_ID # type: ignore[attr-defined]
|
||||
):
|
||||
default_graph = graph.default_context # type: ignore[attr-defined]
|
||||
else:
|
||||
default_graph = Graph()
|
||||
graphs = [default_graph]
|
||||
default_graph_id = default_graph.identifier
|
||||
|
||||
for g in all_contexts:
|
||||
if g in graphs:
|
||||
continue
|
||||
if isinstance(g.identifier, URIRef):
|
||||
graphs.append(g)
|
||||
else:
|
||||
default_graph += g
|
||||
else:
|
||||
graphs = [graph]
|
||||
default_graph_id = graph.identifier
|
||||
|
||||
context = self.context
|
||||
|
||||
objs: List[Any] = []
|
||||
for g in graphs:
|
||||
obj = {}
|
||||
graphname = None
|
||||
|
||||
if isinstance(g.identifier, URIRef):
|
||||
if g.identifier != default_graph_id:
|
||||
graphname = context.shrink_iri(g.identifier)
|
||||
obj[context.id_key] = graphname
|
||||
|
||||
nodes = self.from_graph(g)
|
||||
|
||||
if not graphname and len(nodes) == 1:
|
||||
obj.update(nodes[0])
|
||||
else:
|
||||
if not nodes:
|
||||
continue
|
||||
obj[context.graph_key] = nodes
|
||||
|
||||
if objs and objs[0].get(context.get_key(ID)) == graphname:
|
||||
objs[0].update(obj)
|
||||
else:
|
||||
objs.append(obj)
|
||||
|
||||
if len(graphs) == 1 and len(objs) == 1 and not self.context.active:
|
||||
default = objs[0]
|
||||
items = default.get(context.graph_key)
|
||||
if len(default) == 1 and items:
|
||||
objs = items
|
||||
elif len(objs) == 1 and self.context.active:
|
||||
objs = objs[0]
|
||||
|
||||
return objs
|
||||
|
||||
def from_graph(self, graph: Graph):
|
||||
nodemap: Dict[Any, Any] = {}
|
||||
|
||||
for s in set(graph.subjects()):
|
||||
## only iri:s and unreferenced (rest will be promoted to top if needed)
|
||||
if isinstance(s, URIRef) or (
|
||||
isinstance(s, BNode) and not any(graph.subjects(None, s))
|
||||
):
|
||||
self.process_subject(graph, s, nodemap)
|
||||
|
||||
return list(nodemap.values())
|
||||
|
||||
def process_subject(self, graph: Graph, s: IdentifiedNode, nodemap):
|
||||
if isinstance(s, URIRef):
|
||||
node_id = self.context.shrink_iri(s)
|
||||
elif isinstance(s, BNode):
|
||||
node_id = s.n3()
|
||||
else:
|
||||
# This does not seem right, this probably should be an error.
|
||||
node_id = None
|
||||
|
||||
# used_as_object = any(graph.subjects(None, s))
|
||||
if node_id in nodemap:
|
||||
return None
|
||||
|
||||
node = {}
|
||||
node[self.context.id_key] = node_id
|
||||
nodemap[node_id] = node
|
||||
|
||||
for p, o in graph.predicate_objects(s):
|
||||
# type error: Argument 3 to "add_to_node" of "Converter" has incompatible type "Node"; expected "IdentifiedNode"
|
||||
# type error: Argument 4 to "add_to_node" of "Converter" has incompatible type "Node"; expected "Identifier"
|
||||
self.add_to_node(graph, s, p, o, node, nodemap) # type: ignore[arg-type]
|
||||
|
||||
return node
|
||||
|
||||
def add_to_node(
|
||||
self,
|
||||
graph: Graph,
|
||||
s: IdentifiedNode,
|
||||
p: IdentifiedNode,
|
||||
o: Identifier,
|
||||
s_node: Dict[str, Any],
|
||||
nodemap,
|
||||
):
|
||||
context = self.context
|
||||
|
||||
if isinstance(o, Literal):
|
||||
datatype = str(o.datatype) if o.datatype else None
|
||||
language = o.language
|
||||
term = context.find_term(str(p), datatype, language=language)
|
||||
else:
|
||||
containers = [LIST, None] if graph.value(o, RDF.first) else [None]
|
||||
for container in containers:
|
||||
for coercion in (ID, VOCAB, UNDEF):
|
||||
# type error: Argument 2 to "find_term" of "Context" has incompatible type "object"; expected "Union[str, Defined, None]"
|
||||
# type error: Argument 3 to "find_term" of "Context" has incompatible type "Optional[str]"; expected "Union[Defined, str]"
|
||||
term = context.find_term(str(p), coercion, container) # type: ignore[arg-type]
|
||||
if term:
|
||||
break
|
||||
if term:
|
||||
break
|
||||
|
||||
node = None
|
||||
use_set = not context.active
|
||||
|
||||
if term:
|
||||
p_key = term.name
|
||||
|
||||
if term.type:
|
||||
node = self.type_coerce(o, term.type)
|
||||
# type error: "Identifier" has no attribute "language"
|
||||
elif term.language and o.language == term.language: # type: ignore[attr-defined]
|
||||
node = str(o)
|
||||
# type error: Right operand of "and" is never evaluated
|
||||
elif context.language and (term.language is None and o.language is None): # type: ignore[unreachable]
|
||||
node = str(o) # type: ignore[unreachable]
|
||||
|
||||
if LIST in term.container:
|
||||
node = [
|
||||
self.type_coerce(v, term.type)
|
||||
or self.to_raw_value(graph, s, v, nodemap)
|
||||
for v in self.to_collection(graph, o)
|
||||
]
|
||||
elif LANG in term.container and language:
|
||||
value = s_node.setdefault(p_key, {})
|
||||
values = value.get(language)
|
||||
node = str(o)
|
||||
if values or SET in term.container:
|
||||
if not isinstance(values, list):
|
||||
value[language] = values = [values]
|
||||
values.append(node)
|
||||
else:
|
||||
value[language] = node
|
||||
return
|
||||
elif SET in term.container:
|
||||
use_set = True
|
||||
|
||||
else:
|
||||
p_key = context.to_symbol(p)
|
||||
# TODO: for coercing curies - quite clumsy; unify to_symbol and find_term?
|
||||
key_term = context.terms.get(p_key)
|
||||
if key_term and (key_term.type or key_term.container):
|
||||
p_key = p
|
||||
if not term and p == RDF.type and not self.use_rdf_type:
|
||||
if isinstance(o, URIRef):
|
||||
node = context.to_symbol(o)
|
||||
p_key = context.type_key
|
||||
|
||||
if node is None:
|
||||
node = self.to_raw_value(graph, s, o, nodemap)
|
||||
|
||||
value = s_node.get(p_key)
|
||||
if value:
|
||||
if not isinstance(value, list):
|
||||
value = [value]
|
||||
value.append(node)
|
||||
elif use_set:
|
||||
value = [node]
|
||||
else:
|
||||
value = node
|
||||
s_node[p_key] = value
|
||||
|
||||
def type_coerce(self, o: Identifier, coerce_type: str):
|
||||
if coerce_type == ID:
|
||||
if isinstance(o, URIRef):
|
||||
return self.context.shrink_iri(o)
|
||||
elif isinstance(o, BNode):
|
||||
return o.n3()
|
||||
else:
|
||||
return o
|
||||
elif coerce_type == VOCAB and isinstance(o, URIRef):
|
||||
return self.context.to_symbol(o)
|
||||
elif isinstance(o, Literal) and str(o.datatype) == coerce_type:
|
||||
return o
|
||||
else:
|
||||
return None
|
||||
|
||||
def to_raw_value(
|
||||
self, graph: Graph, s: IdentifiedNode, o: Identifier, nodemap: Dict[str, Any]
|
||||
):
|
||||
context = self.context
|
||||
coll = self.to_collection(graph, o)
|
||||
if coll is not None:
|
||||
coll = [
|
||||
self.to_raw_value(graph, s, lo, nodemap)
|
||||
for lo in self.to_collection(graph, o)
|
||||
]
|
||||
return {context.list_key: coll}
|
||||
elif isinstance(o, BNode):
|
||||
embed = (
|
||||
False # TODO: self.context.active or using startnode and only one ref
|
||||
)
|
||||
onode = self.process_subject(graph, o, nodemap)
|
||||
if onode:
|
||||
if embed and not any(s2 for s2 in graph.subjects(None, o) if s2 != s):
|
||||
return onode
|
||||
else:
|
||||
nodemap[onode[context.id_key]] = onode
|
||||
return {context.id_key: o.n3()}
|
||||
elif isinstance(o, URIRef):
|
||||
# TODO: embed if o != startnode (else reverse)
|
||||
return {context.id_key: context.shrink_iri(o)}
|
||||
elif isinstance(o, Literal):
|
||||
# TODO: if compact
|
||||
native = self.use_native_types and o.datatype in PLAIN_LITERAL_TYPES
|
||||
if native:
|
||||
v = o.toPython()
|
||||
else:
|
||||
v = str(o)
|
||||
if o.datatype:
|
||||
if native and self.context.active:
|
||||
return v
|
||||
return {
|
||||
context.type_key: context.to_symbol(o.datatype),
|
||||
context.value_key: v,
|
||||
}
|
||||
elif o.language and o.language != context.language:
|
||||
return {context.lang_key: o.language, context.value_key: v}
|
||||
# type error: Right operand of "and" is never evaluated
|
||||
elif not context.active or context.language and not o.language: # type: ignore[unreachable]
|
||||
return {context.value_key: v}
|
||||
else:
|
||||
return v
|
||||
|
||||
def to_collection(self, graph: Graph, l_: Identifier):
|
||||
if l_ != RDF.nil and not graph.value(l_, RDF.first):
|
||||
return None
|
||||
list_nodes: List[Optional[_ObjectType]] = []
|
||||
chain = set([l_])
|
||||
while l_:
|
||||
if l_ == RDF.nil:
|
||||
return list_nodes
|
||||
if isinstance(l_, URIRef):
|
||||
return None
|
||||
first, rest = None, None
|
||||
for p, o in graph.predicate_objects(l_):
|
||||
if not first and p == RDF.first:
|
||||
first = o
|
||||
elif not rest and p == RDF.rest:
|
||||
rest = o
|
||||
elif p != RDF.type or o != RDF.List:
|
||||
return None
|
||||
list_nodes.append(first)
|
||||
# type error: Incompatible types in assignment (expression has type "Optional[Node]", variable has type "Identifier")
|
||||
l_ = rest # type: ignore[assignment]
|
||||
if l_ in chain:
|
||||
return None
|
||||
chain.add(l_)
|
||||
+326
@@ -0,0 +1,326 @@
|
||||
"""
|
||||
LongTurtle RDF graph serializer for RDFLib.
|
||||
See <http://www.w3.org/TeamSubmission/turtle/> for syntax specification.
|
||||
|
||||
This variant, longturtle as opposed to just turtle, makes some small format changes
|
||||
to turtle - the original turtle serializer. It:
|
||||
|
||||
* uses PREFIX instead of @prefix
|
||||
* uses BASE instead of @base
|
||||
* adds a new line at RDF.type, or 'a'
|
||||
* adds a newline and an indent for all triples with more than one object (object list)
|
||||
* adds a new line and ';' for the last triple in a set with '.'
|
||||
on the start of the next line
|
||||
* uses default encoding (encode()) is used instead of "latin-1"
|
||||
|
||||
- Nicholas Car, 2023
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import IO, Any, Optional
|
||||
|
||||
from rdflib.compare import to_canonical_graph
|
||||
from rdflib.exceptions import Error
|
||||
from rdflib.graph import Graph
|
||||
from rdflib.namespace import RDF
|
||||
from rdflib.term import BNode, Literal, URIRef
|
||||
|
||||
from .turtle import RecursiveSerializer
|
||||
|
||||
__all__ = ["LongTurtleSerializer"]
|
||||
|
||||
SUBJECT = 0
|
||||
VERB = 1
|
||||
OBJECT = 2
|
||||
|
||||
_GEN_QNAME_FOR_DT = False
|
||||
_SPACIOUS_OUTPUT = False
|
||||
|
||||
|
||||
class LongTurtleSerializer(RecursiveSerializer):
|
||||
short_name = "longturtle"
|
||||
indentString = " "
|
||||
|
||||
def __init__(self, store):
|
||||
self._ns_rewrite = {}
|
||||
store = to_canonical_graph(store)
|
||||
content = store.serialize(format="application/n-triples")
|
||||
lines = content.split("\n")
|
||||
lines.sort()
|
||||
graph = Graph()
|
||||
graph.parse(
|
||||
data="\n".join(lines), format="application/n-triples", skolemize=True
|
||||
)
|
||||
graph = graph.de_skolemize()
|
||||
super(LongTurtleSerializer, self).__init__(graph)
|
||||
self.keywords = {RDF.type: "a"}
|
||||
self.reset()
|
||||
self.stream = None
|
||||
self._spacious: bool = _SPACIOUS_OUTPUT
|
||||
|
||||
def addNamespace(self, prefix, namespace):
|
||||
# Turtle does not support prefixes that start with _
|
||||
# if they occur in the graph, rewrite to p_blah
|
||||
# this is more complicated since we need to make sure p_blah
|
||||
# does not already exist. And we register namespaces as we go, i.e.
|
||||
# we may first see a triple with prefix _9 - rewrite it to p_9
|
||||
# and then later find a triple with a "real" p_9 prefix
|
||||
|
||||
# so we need to keep track of ns rewrites we made so far.
|
||||
|
||||
if (prefix > "" and prefix[0] == "_") or self.namespaces.get(
|
||||
prefix, namespace
|
||||
) != namespace:
|
||||
if prefix not in self._ns_rewrite:
|
||||
p = "p" + prefix
|
||||
while p in self.namespaces:
|
||||
p = "p" + p
|
||||
self._ns_rewrite[prefix] = p
|
||||
|
||||
prefix = self._ns_rewrite.get(prefix, prefix)
|
||||
|
||||
super(LongTurtleSerializer, self).addNamespace(prefix, namespace)
|
||||
return prefix
|
||||
|
||||
def reset(self):
|
||||
super(LongTurtleSerializer, self).reset()
|
||||
self._shortNames = {}
|
||||
self._started = False
|
||||
self._ns_rewrite = {}
|
||||
|
||||
def serialize(
|
||||
self,
|
||||
stream: IO[bytes],
|
||||
base: Optional[str] = None,
|
||||
encoding: Optional[str] = None,
|
||||
spacious: Optional[bool] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
self.reset()
|
||||
self.stream = stream
|
||||
# if base is given here, use, if not and a base is set for the graph use that
|
||||
if base is not None:
|
||||
self.base = base
|
||||
elif self.store.base is not None:
|
||||
self.base = self.store.base
|
||||
|
||||
if spacious is not None:
|
||||
self._spacious = spacious
|
||||
|
||||
self.preprocess()
|
||||
subjects_list = self.orderSubjects()
|
||||
|
||||
self.startDocument()
|
||||
|
||||
firstTime = True
|
||||
for subject in subjects_list:
|
||||
if self.isDone(subject):
|
||||
continue
|
||||
if firstTime:
|
||||
firstTime = False
|
||||
if self.statement(subject) and not firstTime:
|
||||
self.write("\n")
|
||||
|
||||
self.endDocument()
|
||||
|
||||
self.base = None
|
||||
|
||||
def preprocessTriple(self, triple):
|
||||
super(LongTurtleSerializer, self).preprocessTriple(triple)
|
||||
for i, node in enumerate(triple):
|
||||
if node in self.keywords:
|
||||
continue
|
||||
# Don't use generated prefixes for subjects and objects
|
||||
self.getQName(node, gen_prefix=(i == VERB))
|
||||
if isinstance(node, Literal) and node.datatype:
|
||||
self.getQName(node.datatype, gen_prefix=_GEN_QNAME_FOR_DT)
|
||||
p = triple[1]
|
||||
if isinstance(p, BNode): # hmm - when is P ever a bnode?
|
||||
self._references[p] += 1
|
||||
|
||||
def getQName(self, uri, gen_prefix=True):
|
||||
if not isinstance(uri, URIRef):
|
||||
return None
|
||||
|
||||
try:
|
||||
parts = self.store.compute_qname(uri, generate=gen_prefix)
|
||||
except Exception:
|
||||
# is the uri a namespace in itself?
|
||||
pfx = self.store.store.prefix(uri)
|
||||
|
||||
if pfx is not None:
|
||||
parts = (pfx, uri, "")
|
||||
else:
|
||||
# nothing worked
|
||||
return None
|
||||
|
||||
prefix, namespace, local = parts
|
||||
|
||||
# QName cannot end with .
|
||||
if local.endswith("."):
|
||||
return None
|
||||
|
||||
prefix = self.addNamespace(prefix, namespace)
|
||||
|
||||
return "%s:%s" % (prefix, local)
|
||||
|
||||
def startDocument(self):
|
||||
self._started = True
|
||||
ns_list = sorted(self.namespaces.items())
|
||||
|
||||
if self.base:
|
||||
self.write(self.indent() + "BASE <%s>\n" % self.base)
|
||||
for prefix, uri in ns_list:
|
||||
self.write(self.indent() + "PREFIX %s: <%s>\n" % (prefix, uri))
|
||||
if ns_list and self._spacious:
|
||||
self.write("\n")
|
||||
|
||||
def endDocument(self):
|
||||
if self._spacious:
|
||||
self.write("\n")
|
||||
|
||||
def statement(self, subject):
|
||||
self.subjectDone(subject)
|
||||
return self.s_squared(subject) or self.s_default(subject)
|
||||
|
||||
def s_default(self, subject):
|
||||
self.write("\n" + self.indent())
|
||||
self.path(subject, SUBJECT)
|
||||
self.write("\n" + self.indent())
|
||||
self.predicateList(subject)
|
||||
self.write("\n.")
|
||||
return True
|
||||
|
||||
def s_squared(self, subject):
|
||||
if (self._references[subject] > 0) or not isinstance(subject, BNode):
|
||||
return False
|
||||
self.write("\n" + self.indent() + "[]")
|
||||
self.predicateList(subject, newline=False)
|
||||
self.write("\n.")
|
||||
return True
|
||||
|
||||
def path(self, node, position, newline=False):
|
||||
if not (
|
||||
self.p_squared(node, position) or self.p_default(node, position, newline)
|
||||
):
|
||||
raise Error("Cannot serialize node '%s'" % (node,))
|
||||
|
||||
def p_default(self, node, position, newline=False):
|
||||
if position != SUBJECT and not newline:
|
||||
self.write(" ")
|
||||
self.write(self.label(node, position))
|
||||
return True
|
||||
|
||||
def label(self, node, position):
|
||||
if node == RDF.nil:
|
||||
return "()"
|
||||
if position is VERB and node in self.keywords:
|
||||
return self.keywords[node]
|
||||
if isinstance(node, Literal):
|
||||
return node._literal_n3(
|
||||
use_plain=True,
|
||||
qname_callback=lambda dt: self.getQName(dt, _GEN_QNAME_FOR_DT),
|
||||
)
|
||||
else:
|
||||
node = self.relativize(node)
|
||||
|
||||
return self.getQName(node, position == VERB) or node.n3()
|
||||
|
||||
def p_squared(
|
||||
self,
|
||||
node,
|
||||
position,
|
||||
):
|
||||
if (
|
||||
not isinstance(node, BNode)
|
||||
or node in self._serialized
|
||||
or self._references[node] > 1
|
||||
or position == SUBJECT
|
||||
):
|
||||
return False
|
||||
|
||||
if self.isValidList(node):
|
||||
# this is a list
|
||||
self.depth += 2
|
||||
self.write(" (\n")
|
||||
self.depth -= 2
|
||||
self.doList(node)
|
||||
self.write("\n" + self.indent() + ")")
|
||||
else:
|
||||
# this is a Blank Node
|
||||
self.subjectDone(node)
|
||||
self.write("\n" + self.indent(1) + "[\n")
|
||||
self.depth += 1
|
||||
self.predicateList(node)
|
||||
self.depth -= 1
|
||||
self.write("\n" + self.indent(1) + "]")
|
||||
|
||||
return True
|
||||
|
||||
def isValidList(self, l_):
|
||||
"""
|
||||
Checks if l is a valid RDF list, i.e. no nodes have other properties.
|
||||
"""
|
||||
try:
|
||||
if self.store.value(l_, RDF.first) is None:
|
||||
return False
|
||||
except Exception:
|
||||
return False
|
||||
while l_:
|
||||
if l_ != RDF.nil and len(list(self.store.predicate_objects(l_))) != 2:
|
||||
return False
|
||||
l_ = self.store.value(l_, RDF.rest)
|
||||
return True
|
||||
|
||||
def doList(self, l_):
|
||||
i = 0
|
||||
while l_:
|
||||
item = self.store.value(l_, RDF.first)
|
||||
if item is not None:
|
||||
if i == 0:
|
||||
self.write(self.indent(1))
|
||||
else:
|
||||
self.write("\n" + self.indent(1))
|
||||
self.path(item, OBJECT, newline=True)
|
||||
self.subjectDone(l_)
|
||||
l_ = self.store.value(l_, RDF.rest)
|
||||
i += 1
|
||||
|
||||
def predicateList(self, subject, newline=False):
|
||||
properties = self.buildPredicateHash(subject)
|
||||
propList = self.sortProperties(properties)
|
||||
if len(propList) == 0:
|
||||
return
|
||||
self.write(self.indent(1))
|
||||
self.verb(propList[0], newline=True)
|
||||
self.objectList(properties[propList[0]])
|
||||
for predicate in propList[1:]:
|
||||
self.write(" ;\n" + self.indent(1))
|
||||
self.verb(predicate, newline=True)
|
||||
self.objectList(properties[predicate])
|
||||
self.write(" ;")
|
||||
|
||||
def verb(self, node, newline=False):
|
||||
self.path(node, VERB, newline)
|
||||
|
||||
def objectList(self, objects):
|
||||
count = len(objects)
|
||||
if count == 0:
|
||||
return
|
||||
depthmod = (count == 1) and 0 or 1
|
||||
self.depth += depthmod
|
||||
first_nl = False
|
||||
if count > 1:
|
||||
if not isinstance(objects[0], BNode):
|
||||
self.write("\n" + self.indent(1))
|
||||
else:
|
||||
self.write(" ")
|
||||
first_nl = True
|
||||
self.path(objects[0], OBJECT, newline=first_nl)
|
||||
for obj in objects[1:]:
|
||||
self.write(" ,")
|
||||
if not isinstance(obj, BNode):
|
||||
self.write("\n" + self.indent(1))
|
||||
self.path(obj, OBJECT, newline=True)
|
||||
self.depth -= depthmod
|
||||
@@ -0,0 +1,91 @@
|
||||
"""
|
||||
Notation 3 (N3) RDF graph serializer for RDFLib.
|
||||
"""
|
||||
|
||||
from rdflib.graph import Graph
|
||||
from rdflib.namespace import OWL, Namespace
|
||||
from rdflib.plugins.serializers.turtle import OBJECT, SUBJECT, TurtleSerializer
|
||||
|
||||
__all__ = ["N3Serializer"]
|
||||
|
||||
SWAP_LOG = Namespace("http://www.w3.org/2000/10/swap/log#")
|
||||
|
||||
|
||||
class N3Serializer(TurtleSerializer):
|
||||
short_name = "n3"
|
||||
|
||||
def __init__(self, store: Graph, parent=None):
|
||||
super(N3Serializer, self).__init__(store)
|
||||
self.keywords.update({OWL.sameAs: "=", SWAP_LOG.implies: "=>"})
|
||||
self.parent = parent
|
||||
|
||||
def reset(self):
|
||||
super(N3Serializer, self).reset()
|
||||
self._stores = {}
|
||||
|
||||
def endDocument(self): # noqa: N802
|
||||
if not self.parent:
|
||||
super(N3Serializer, self).endDocument()
|
||||
|
||||
def indent(self, modifier=0):
|
||||
indent = super(N3Serializer, self).indent(modifier)
|
||||
if self.parent is not None:
|
||||
indent += self.parent.indent() # modifier)
|
||||
return indent
|
||||
|
||||
def preprocessTriple(self, triple): # noqa: N802
|
||||
super(N3Serializer, self).preprocessTriple(triple)
|
||||
if isinstance(triple[0], Graph):
|
||||
for t in triple[0]:
|
||||
self.preprocessTriple(t)
|
||||
if isinstance(triple[1], Graph):
|
||||
for t in triple[1]:
|
||||
self.preprocessTriple(t)
|
||||
if isinstance(triple[2], Graph):
|
||||
for t in triple[2]:
|
||||
self.preprocessTriple(t)
|
||||
|
||||
def getQName(self, uri, gen_prefix=True): # noqa: N802
|
||||
qname = None
|
||||
if self.parent is not None:
|
||||
qname = self.parent.getQName(uri, gen_prefix)
|
||||
if qname is None:
|
||||
qname = super(N3Serializer, self).getQName(uri, gen_prefix)
|
||||
return qname
|
||||
|
||||
def statement(self, subject):
|
||||
self.subjectDone(subject)
|
||||
properties = self.buildPredicateHash(subject)
|
||||
if len(properties) == 0:
|
||||
return False
|
||||
return self.s_clause(subject) or super(N3Serializer, self).statement(subject)
|
||||
|
||||
def path(self, node, position, newline=False):
|
||||
if not self.p_clause(node, position):
|
||||
super(N3Serializer, self).path(node, position, newline)
|
||||
|
||||
def s_clause(self, subject):
|
||||
if isinstance(subject, Graph):
|
||||
self.write("\n" + self.indent())
|
||||
self.p_clause(subject, SUBJECT)
|
||||
self.predicateList(subject)
|
||||
self.write(" .")
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def p_clause(self, node, position):
|
||||
if isinstance(node, Graph):
|
||||
self.subjectDone(node)
|
||||
if position is OBJECT:
|
||||
self.write(" ")
|
||||
self.write("{")
|
||||
self.depth += 1
|
||||
serializer = N3Serializer(node, parent=self)
|
||||
# type error: Argument 1 to "serialize" of "TurtleSerializer" has incompatible type "Optional[IO[bytes]]"; expected "IO[bytes]"
|
||||
serializer.serialize(self.stream) # type: ignore[arg-type]
|
||||
self.depth -= 1
|
||||
self.write(self.indent() + "}")
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
@@ -0,0 +1,61 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import warnings
|
||||
from typing import IO, Any, Optional
|
||||
|
||||
from rdflib.graph import ConjunctiveGraph, Graph
|
||||
from rdflib.plugins.serializers.nt import _quoteLiteral
|
||||
from rdflib.serializer import Serializer
|
||||
from rdflib.term import Literal
|
||||
|
||||
__all__ = ["NQuadsSerializer"]
|
||||
|
||||
|
||||
class NQuadsSerializer(Serializer):
|
||||
def __init__(self, store: Graph):
|
||||
if not store.context_aware:
|
||||
raise Exception(
|
||||
"NQuads serialization only makes " "sense for context-aware stores!"
|
||||
)
|
||||
|
||||
super(NQuadsSerializer, self).__init__(store)
|
||||
self.store: ConjunctiveGraph
|
||||
|
||||
def serialize(
|
||||
self,
|
||||
stream: IO[bytes],
|
||||
base: Optional[str] = None,
|
||||
encoding: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
if base is not None:
|
||||
warnings.warn("NQuadsSerializer does not support base.")
|
||||
if encoding is not None and encoding.lower() != self.encoding.lower():
|
||||
warnings.warn(
|
||||
"NQuadsSerializer does not use custom encoding. "
|
||||
f"Given encoding was: {encoding}"
|
||||
)
|
||||
encoding = self.encoding
|
||||
for context in self.store.contexts():
|
||||
for triple in context:
|
||||
stream.write(
|
||||
_nq_row(triple, context.identifier).encode(encoding, "replace")
|
||||
)
|
||||
stream.write("\n".encode("latin-1"))
|
||||
|
||||
|
||||
def _nq_row(triple, context):
|
||||
if isinstance(triple[2], Literal):
|
||||
return "%s %s %s %s .\n" % (
|
||||
triple[0].n3(),
|
||||
triple[1].n3(),
|
||||
_quoteLiteral(triple[2]),
|
||||
context.n3(),
|
||||
)
|
||||
else:
|
||||
return "%s %s %s %s .\n" % (
|
||||
triple[0].n3(),
|
||||
triple[1].n3(),
|
||||
triple[2].n3(),
|
||||
context.n3(),
|
||||
)
|
||||
@@ -0,0 +1,115 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import codecs
|
||||
import warnings
|
||||
from typing import IO, TYPE_CHECKING, Any, Optional, Tuple, Union
|
||||
|
||||
from rdflib.graph import Graph
|
||||
from rdflib.serializer import Serializer
|
||||
from rdflib.term import Literal
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from rdflib.graph import _TripleType
|
||||
|
||||
"""
|
||||
N-Triples RDF graph serializer for RDFLib.
|
||||
See <http://www.w3.org/TR/rdf-testcases/#ntriples> for details about the
|
||||
format.
|
||||
"""
|
||||
|
||||
__all__ = ["NTSerializer"]
|
||||
|
||||
|
||||
class NTSerializer(Serializer):
|
||||
"""
|
||||
Serializes RDF graphs to NTriples format.
|
||||
"""
|
||||
|
||||
def __init__(self, store: Graph):
|
||||
Serializer.__init__(self, store)
|
||||
|
||||
def serialize(
|
||||
self,
|
||||
stream: IO[bytes],
|
||||
base: Optional[str] = None,
|
||||
encoding: Optional[str] = "utf-8",
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
if base is not None:
|
||||
warnings.warn("NTSerializer does not support base.")
|
||||
if encoding != "utf-8":
|
||||
warnings.warn(
|
||||
"NTSerializer always uses UTF-8 encoding. "
|
||||
f"Given encoding was: {encoding}"
|
||||
)
|
||||
|
||||
for triple in self.store:
|
||||
stream.write(_nt_row(triple).encode())
|
||||
|
||||
|
||||
class NT11Serializer(NTSerializer):
|
||||
"""
|
||||
Serializes RDF graphs to RDF 1.1 NTriples format.
|
||||
|
||||
Exactly like nt - only utf8 encoded.
|
||||
"""
|
||||
|
||||
def __init__(self, store: Graph):
|
||||
Serializer.__init__(self, store) # default to utf-8
|
||||
|
||||
|
||||
def _nt_row(triple: _TripleType) -> str:
|
||||
if isinstance(triple[2], Literal):
|
||||
return "%s %s %s .\n" % (
|
||||
triple[0].n3(),
|
||||
triple[1].n3(),
|
||||
_quoteLiteral(triple[2]),
|
||||
)
|
||||
else:
|
||||
return "%s %s %s .\n" % (triple[0].n3(), triple[1].n3(), triple[2].n3())
|
||||
|
||||
|
||||
def _quoteLiteral(l_: Literal) -> str: # noqa: N802
|
||||
"""
|
||||
a simpler version of term.Literal.n3()
|
||||
"""
|
||||
|
||||
encoded = _quote_encode(l_)
|
||||
|
||||
if l_.language:
|
||||
if l_.datatype:
|
||||
raise Exception("Literal has datatype AND language!")
|
||||
return "%s@%s" % (encoded, l_.language)
|
||||
elif l_.datatype:
|
||||
return "%s^^<%s>" % (encoded, l_.datatype)
|
||||
else:
|
||||
return "%s" % encoded
|
||||
|
||||
|
||||
def _quote_encode(l_: str) -> str:
|
||||
return '"%s"' % l_.replace("\\", "\\\\").replace("\n", "\\n").replace(
|
||||
'"', '\\"'
|
||||
).replace("\r", "\\r")
|
||||
|
||||
|
||||
def _nt_unicode_error_resolver(
|
||||
err: UnicodeError,
|
||||
) -> Tuple[Union[str, bytes], int]:
|
||||
"""
|
||||
Do unicode char replaces as defined in https://www.w3.org/TR/2004/REC-rdf-testcases-20040210/#ntrip_strings
|
||||
"""
|
||||
|
||||
def _replace_single(c):
|
||||
c = ord(c)
|
||||
fmt = "\\u%04X" if c <= 0xFFFF else "\\U%08X"
|
||||
return fmt % c
|
||||
|
||||
# type error: "UnicodeError" has no attribute "object"
|
||||
# type error: "UnicodeError" has no attribute "start"
|
||||
# type error: "UnicodeError" has no attribute "end"
|
||||
string = err.object[err.start : err.end] # type: ignore[attr-defined]
|
||||
# type error: "UnicodeError" has no attribute "end"
|
||||
return "".join(_replace_single(c) for c in string), err.end # type: ignore[attr-defined]
|
||||
|
||||
|
||||
codecs.register_error("_rdflib_nt_escape", _nt_unicode_error_resolver)
|
||||
@@ -0,0 +1,108 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import warnings
|
||||
from typing import IO, Any, Optional
|
||||
from uuid import uuid4
|
||||
|
||||
from rdflib import Dataset
|
||||
from rdflib.plugins.serializers.nquads import _nq_row
|
||||
from rdflib.plugins.serializers.nt import _nt_row
|
||||
from rdflib.serializer import Serializer
|
||||
|
||||
add_remove_methods = {"add": "A", "remove": "D"}
|
||||
|
||||
|
||||
class PatchSerializer(Serializer):
|
||||
"""
|
||||
Creates an RDF patch file to add and remove triples/quads.
|
||||
Can either:
|
||||
- Create an add or delete patch for a single Dataset.
|
||||
- Create a patch to represent the difference between two Datasets.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
store: Dataset,
|
||||
):
|
||||
self.store: Dataset = store
|
||||
super().__init__(store)
|
||||
|
||||
def serialize(
|
||||
self,
|
||||
stream: IO[bytes],
|
||||
base: Optional[str] = None,
|
||||
encoding: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""
|
||||
Serialize the store to the given stream.
|
||||
:param stream: The stream to serialize to.
|
||||
:param base: The base URI to use for the serialization.
|
||||
:param encoding: The encoding to use for the serialization.
|
||||
:param kwargs: Additional keyword arguments.
|
||||
Supported keyword arguments:
|
||||
- operation: The operation to perform. Either 'add' or 'remove'.
|
||||
- target: The target Dataset to compare against.
|
||||
NB: Only one of 'operation' or 'target' should be provided.
|
||||
- header_id: The header ID to use.
|
||||
- header_prev: The previous header ID to use.
|
||||
"""
|
||||
operation = kwargs.get("operation")
|
||||
target = kwargs.get("target")
|
||||
header_id = kwargs.get("header_id")
|
||||
header_prev = kwargs.get("header_prev")
|
||||
if not header_id:
|
||||
header_id = f"uuid:{uuid4()}"
|
||||
encoding = self.encoding
|
||||
if base is not None:
|
||||
warnings.warn("PatchSerializer does not support base.")
|
||||
if encoding is not None and encoding.lower() != self.encoding.lower():
|
||||
warnings.warn(
|
||||
"PatchSerializer does not use custom encoding. "
|
||||
f"Given encoding was: {encoding}"
|
||||
)
|
||||
|
||||
def write_header():
|
||||
stream.write(f"H id <{header_id}> .\n".encode(encoding, "replace"))
|
||||
if header_prev:
|
||||
stream.write(f"H prev <{header_prev}>\n".encode(encoding, "replace"))
|
||||
stream.write("TX .\n".encode(encoding, "replace"))
|
||||
|
||||
def write_triples(contexts, op_code, use_passed_contexts=False):
|
||||
for context in contexts:
|
||||
if not use_passed_contexts:
|
||||
context = self.store.get_context(context.identifier)
|
||||
for triple in context:
|
||||
stream.write(
|
||||
self._patch_row(triple, context.identifier, op_code).encode(
|
||||
encoding, "replace"
|
||||
)
|
||||
)
|
||||
|
||||
if operation:
|
||||
assert operation in add_remove_methods, f"Invalid operation: {operation}"
|
||||
elif not target:
|
||||
# No operation specified and no target specified
|
||||
# Fall back to default operation of "add" to prevent a no-op
|
||||
operation = "add"
|
||||
write_header()
|
||||
if operation:
|
||||
operation_code = add_remove_methods.get(operation)
|
||||
write_triples(self.store.contexts(), operation_code)
|
||||
elif target:
|
||||
to_add, to_remove = self._diff(target)
|
||||
write_triples(to_add.contexts(), "A", use_passed_contexts=True)
|
||||
write_triples(to_remove.contexts(), "D", use_passed_contexts=True)
|
||||
|
||||
stream.write("TC .\n".encode(encoding, "replace"))
|
||||
|
||||
def _diff(self, target):
|
||||
rows_to_add = target - self.store
|
||||
rows_to_remove = self.store - target
|
||||
return rows_to_add, rows_to_remove
|
||||
|
||||
def _patch_row(self, triple, context_id, operation):
|
||||
if context_id == self.store.default_context.identifier:
|
||||
return f"{operation} {_nt_row(triple)}"
|
||||
else:
|
||||
return f"{operation} {_nq_row(triple, context_id)}"
|
||||
@@ -0,0 +1,391 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import xml.dom.minidom
|
||||
from typing import IO, Any, Dict, Generator, Optional, Set, Tuple
|
||||
from xml.sax.saxutils import escape, quoteattr
|
||||
|
||||
from rdflib.collection import Collection
|
||||
from rdflib.graph import Graph
|
||||
from rdflib.namespace import RDF, RDFS, Namespace # , split_uri
|
||||
from rdflib.plugins.parsers.RDFVOC import RDFVOC
|
||||
from rdflib.plugins.serializers.xmlwriter import XMLWriter
|
||||
from rdflib.serializer import Serializer
|
||||
from rdflib.term import BNode, IdentifiedNode, Identifier, Literal, Node, URIRef
|
||||
from rdflib.util import first, more_than
|
||||
|
||||
from .xmlwriter import ESCAPE_ENTITIES
|
||||
|
||||
__all__ = ["fix", "XMLSerializer", "PrettyXMLSerializer"]
|
||||
|
||||
|
||||
class XMLSerializer(Serializer):
|
||||
def __init__(self, store: Graph):
|
||||
super(XMLSerializer, self).__init__(store)
|
||||
|
||||
def __bindings(self) -> Generator[Tuple[str, URIRef], None, None]:
|
||||
store = self.store
|
||||
nm = store.namespace_manager
|
||||
bindings: Dict[str, URIRef] = {}
|
||||
|
||||
for predicate in set(store.predicates()):
|
||||
# type error: Argument 1 to "compute_qname_strict" of "NamespaceManager" has incompatible type "Node"; expected "str"
|
||||
prefix, namespace, name = nm.compute_qname_strict(predicate) # type: ignore[arg-type]
|
||||
bindings[prefix] = URIRef(namespace)
|
||||
|
||||
RDFNS = URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#") # noqa: N806
|
||||
|
||||
if "rdf" in bindings:
|
||||
assert bindings["rdf"] == RDFNS
|
||||
else:
|
||||
bindings["rdf"] = RDFNS
|
||||
|
||||
for prefix, namespace in bindings.items():
|
||||
yield prefix, namespace
|
||||
|
||||
def serialize(
|
||||
self,
|
||||
stream: IO[bytes],
|
||||
base: Optional[str] = None,
|
||||
encoding: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
# if base is given here, use that, if not and a base is set for the graph use that
|
||||
if base is not None:
|
||||
self.base = base
|
||||
elif self.store.base is not None:
|
||||
self.base = self.store.base
|
||||
self.__stream = stream
|
||||
self.__serialized: Dict[Identifier, int] = {}
|
||||
encoding = self.encoding
|
||||
self.write = write = lambda uni: stream.write(uni.encode(encoding, "replace"))
|
||||
|
||||
# startDocument
|
||||
write('<?xml version="1.0" encoding="%s"?>\n' % self.encoding)
|
||||
|
||||
# startRDF
|
||||
write("<rdf:RDF\n")
|
||||
|
||||
# If provided, write xml:base attribute for the RDF
|
||||
if "xml_base" in kwargs:
|
||||
write(' xml:base="%s"\n' % kwargs["xml_base"])
|
||||
elif self.base:
|
||||
write(' xml:base="%s"\n' % self.base)
|
||||
# TODO:
|
||||
# assert(
|
||||
# namespaces["http://www.w3.org/1999/02/22-rdf-syntax-ns#"]=='rdf')
|
||||
bindings = list(self.__bindings())
|
||||
bindings.sort()
|
||||
|
||||
for prefix, namespace in bindings:
|
||||
if prefix:
|
||||
write(' xmlns:%s="%s"\n' % (prefix, namespace))
|
||||
else:
|
||||
write(' xmlns="%s"\n' % namespace)
|
||||
write(">\n")
|
||||
|
||||
# write out triples by subject
|
||||
for subject in self.store.subjects():
|
||||
# type error: Argument 1 to "subject" of "XMLSerializer" has incompatible type "Node"; expected "Identifier"
|
||||
self.subject(subject, 1) # type: ignore[arg-type]
|
||||
|
||||
# endRDF
|
||||
write("</rdf:RDF>\n")
|
||||
|
||||
# Set to None so that the memory can get garbage collected.
|
||||
# self.__serialized = None
|
||||
del self.__serialized
|
||||
|
||||
def subject(self, subject: Identifier, depth: int = 1) -> None:
|
||||
if subject not in self.__serialized:
|
||||
self.__serialized[subject] = 1
|
||||
|
||||
if isinstance(subject, (BNode, URIRef)):
|
||||
write = self.write
|
||||
indent = " " * depth
|
||||
element_name = "rdf:Description"
|
||||
|
||||
if isinstance(subject, BNode):
|
||||
write('%s<%s rdf:nodeID="%s"' % (indent, element_name, subject))
|
||||
else:
|
||||
uri = quoteattr(self.relativize(subject))
|
||||
write("%s<%s rdf:about=%s" % (indent, element_name, uri))
|
||||
|
||||
if (subject, None, None) in self.store:
|
||||
write(">\n")
|
||||
|
||||
for predicate, object in self.store.predicate_objects(subject):
|
||||
# type error: Argument 1 to "predicate" of "XMLSerializer" has incompatible type "Node"; expected "Identifier"
|
||||
# type error: Argument 2 to "predicate" of "XMLSerializer" has incompatible type "Node"; expected "Identifier"
|
||||
self.predicate(predicate, object, depth + 1) # type: ignore[arg-type]
|
||||
write("%s</%s>\n" % (indent, element_name))
|
||||
|
||||
else:
|
||||
write("/>\n")
|
||||
|
||||
def predicate(
|
||||
self, predicate: Identifier, object: Identifier, depth: int = 1
|
||||
) -> None:
|
||||
write = self.write
|
||||
indent = " " * depth
|
||||
qname = self.store.namespace_manager.qname_strict(predicate)
|
||||
|
||||
if isinstance(object, Literal):
|
||||
attributes = ""
|
||||
|
||||
if object.language:
|
||||
attributes += ' xml:lang="%s"' % object.language
|
||||
|
||||
if object.datatype:
|
||||
attributes += ' rdf:datatype="%s"' % object.datatype
|
||||
|
||||
write(
|
||||
"%s<%s%s>%s</%s>\n"
|
||||
% (indent, qname, attributes, escape(object, ESCAPE_ENTITIES), qname)
|
||||
)
|
||||
else:
|
||||
if isinstance(object, BNode):
|
||||
write('%s<%s rdf:nodeID="%s"/>\n' % (indent, qname, object))
|
||||
else:
|
||||
write(
|
||||
"%s<%s rdf:resource=%s/>\n"
|
||||
% (indent, qname, quoteattr(self.relativize(object)))
|
||||
)
|
||||
|
||||
|
||||
XMLLANG = "http://www.w3.org/XML/1998/namespacelang"
|
||||
XMLBASE = "http://www.w3.org/XML/1998/namespacebase"
|
||||
OWL_NS = Namespace("http://www.w3.org/2002/07/owl#")
|
||||
|
||||
|
||||
# TODO:
|
||||
def fix(val: str) -> str:
|
||||
"strip off _: from nodeIDs... as they are not valid NCNames"
|
||||
if val.startswith("_:"):
|
||||
return val[2:]
|
||||
else:
|
||||
return val
|
||||
|
||||
|
||||
class PrettyXMLSerializer(Serializer):
|
||||
def __init__(self, store: Graph, max_depth=3):
|
||||
super(PrettyXMLSerializer, self).__init__(store)
|
||||
self.forceRDFAbout: Set[URIRef] = set()
|
||||
|
||||
def serialize(
|
||||
self,
|
||||
stream: IO[bytes],
|
||||
base: Optional[str] = None,
|
||||
encoding: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
self.__serialized: Dict[Identifier, int] = {}
|
||||
store = self.store
|
||||
# if base is given here, use that, if not and a base is set for the graph use that
|
||||
if base is not None:
|
||||
self.base = base
|
||||
elif store.base is not None:
|
||||
self.base = store.base
|
||||
self.max_depth = kwargs.get("max_depth", 3)
|
||||
assert self.max_depth > 0, "max_depth must be greater than 0"
|
||||
|
||||
self.nm = nm = store.namespace_manager
|
||||
self.writer = writer = XMLWriter(stream, nm, encoding)
|
||||
namespaces = {}
|
||||
|
||||
possible: Set[Node] = set(store.predicates()).union(
|
||||
store.objects(None, RDF.type)
|
||||
)
|
||||
|
||||
for predicate in possible:
|
||||
# type error: Argument 1 to "compute_qname_strict" of "NamespaceManager" has incompatible type "Node"; expected "str"
|
||||
prefix, namespace, local = nm.compute_qname_strict(predicate) # type: ignore[arg-type]
|
||||
namespaces[prefix] = namespace
|
||||
|
||||
namespaces["rdf"] = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
|
||||
writer.push(RDFVOC.RDF)
|
||||
|
||||
if "xml_base" in kwargs:
|
||||
writer.attribute(XMLBASE, kwargs["xml_base"])
|
||||
elif self.base:
|
||||
writer.attribute(XMLBASE, self.base)
|
||||
|
||||
writer.namespaces(namespaces.items())
|
||||
|
||||
subject: IdentifiedNode
|
||||
# Write out subjects that can not be inline
|
||||
# type error: Incompatible types in assignment (expression has type "Node", variable has type "IdentifiedNode")
|
||||
for subject in store.subjects(): # type: ignore[assignment]
|
||||
if (None, None, subject) in store:
|
||||
if (subject, None, subject) in store:
|
||||
self.subject(subject, 1)
|
||||
else:
|
||||
self.subject(subject, 1)
|
||||
|
||||
# write out anything that has not yet been reached
|
||||
# write out BNodes last (to ensure they can be inlined where possible)
|
||||
bnodes = set()
|
||||
|
||||
# type error: Incompatible types in assignment (expression has type "Node", variable has type "IdentifiedNode")
|
||||
for subject in store.subjects(): # type: ignore[assignment]
|
||||
if isinstance(subject, BNode):
|
||||
bnodes.add(subject)
|
||||
continue
|
||||
self.subject(subject, 1)
|
||||
|
||||
# now serialize only those BNodes that have not been serialized yet
|
||||
for bnode in bnodes:
|
||||
if bnode not in self.__serialized:
|
||||
self.subject(subject, 1)
|
||||
|
||||
writer.pop(RDFVOC.RDF)
|
||||
stream.write("\n".encode("latin-1"))
|
||||
|
||||
# Set to None so that the memory can get garbage collected.
|
||||
self.__serialized = None # type: ignore[assignment]
|
||||
|
||||
def subject(self, subject: Identifier, depth: int = 1):
|
||||
store = self.store
|
||||
writer = self.writer
|
||||
|
||||
if subject in self.forceRDFAbout:
|
||||
writer.push(RDFVOC.Description)
|
||||
writer.attribute(RDFVOC.about, self.relativize(subject))
|
||||
writer.pop(RDFVOC.Description)
|
||||
self.forceRDFAbout.remove(subject) # type: ignore[arg-type]
|
||||
|
||||
elif subject not in self.__serialized:
|
||||
self.__serialized[subject] = 1
|
||||
type = first(store.objects(subject, RDF.type))
|
||||
|
||||
try:
|
||||
# type error: Argument 1 to "qname" of "NamespaceManager" has incompatible type "Optional[Node]"; expected "str"
|
||||
self.nm.qname(type) # type: ignore[arg-type]
|
||||
except Exception:
|
||||
type = None
|
||||
|
||||
element = type or RDFVOC.Description
|
||||
# type error: Argument 1 to "push" of "XMLWriter" has incompatible type "Node"; expected "str"
|
||||
writer.push(element) # type: ignore[arg-type]
|
||||
|
||||
if isinstance(subject, BNode):
|
||||
|
||||
def subj_as_obj_more_than(ceil):
|
||||
return True
|
||||
# more_than(store.triples((None, None, subject)), ceil)
|
||||
|
||||
# here we only include BNode labels if they are referenced
|
||||
# more than once (this reduces the use of redundant BNode
|
||||
# identifiers)
|
||||
if subj_as_obj_more_than(1):
|
||||
writer.attribute(RDFVOC.nodeID, fix(subject))
|
||||
|
||||
else:
|
||||
writer.attribute(RDFVOC.about, self.relativize(subject))
|
||||
|
||||
if (subject, None, None) in store:
|
||||
for predicate, object in store.predicate_objects(subject):
|
||||
if not (predicate == RDF.type and object == type):
|
||||
# type error: Argument 1 to "predicate" of "PrettyXMLSerializer" has incompatible type "Node"; expected "Identifier"
|
||||
# type error: Argument 2 to "predicate" of "PrettyXMLSerializer" has incompatible type "Node"; expected "Identifier"
|
||||
self.predicate(predicate, object, depth + 1) # type: ignore[arg-type]
|
||||
|
||||
# type error: Argument 1 to "pop" of "XMLWriter" has incompatible type "Node"; expected "Optional[str]"
|
||||
writer.pop(element) # type: ignore[arg-type]
|
||||
|
||||
elif subject in self.forceRDFAbout:
|
||||
# TODO FIXME?: this looks like a duplicate of first condition
|
||||
writer.push(RDFVOC.Description)
|
||||
writer.attribute(RDFVOC.about, self.relativize(subject))
|
||||
writer.pop(RDFVOC.Description)
|
||||
self.forceRDFAbout.remove(subject) # type: ignore[arg-type]
|
||||
|
||||
def predicate(
|
||||
self, predicate: Identifier, object: Identifier, depth: int = 1
|
||||
) -> None:
|
||||
writer = self.writer
|
||||
store = self.store
|
||||
writer.push(predicate)
|
||||
|
||||
if isinstance(object, Literal):
|
||||
if object.language:
|
||||
writer.attribute(XMLLANG, object.language)
|
||||
|
||||
if object.datatype == RDF.XMLLiteral and isinstance(
|
||||
object.value, xml.dom.minidom.Document
|
||||
):
|
||||
writer.attribute(RDFVOC.parseType, "Literal")
|
||||
writer.text("")
|
||||
writer.stream.write(object)
|
||||
else:
|
||||
if object.datatype:
|
||||
writer.attribute(RDFVOC.datatype, object.datatype)
|
||||
writer.text(object)
|
||||
|
||||
elif (
|
||||
object in self.__serialized
|
||||
or not (object, None, None) in store # noqa: E713
|
||||
):
|
||||
if isinstance(object, BNode):
|
||||
if more_than(store.triples((None, None, object)), 0):
|
||||
writer.attribute(RDFVOC.nodeID, fix(object))
|
||||
else:
|
||||
writer.attribute(RDFVOC.resource, self.relativize(object))
|
||||
|
||||
else:
|
||||
if first(store.objects(object, RDF.first)): # may not have type
|
||||
# RDF.List
|
||||
|
||||
self.__serialized[object] = 1
|
||||
|
||||
# Warn that any assertions on object other than
|
||||
# RDF.first and RDF.rest are ignored... including RDF.List
|
||||
import warnings
|
||||
|
||||
warnings.warn(
|
||||
"Assertions on %s other than RDF.first " % repr(object)
|
||||
+ "and RDF.rest are ignored ... including RDF.List",
|
||||
UserWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
writer.attribute(RDFVOC.parseType, "Collection")
|
||||
|
||||
col = Collection(store, object)
|
||||
|
||||
for item in col:
|
||||
if isinstance(item, URIRef):
|
||||
self.forceRDFAbout.add(item)
|
||||
# type error: Argument 1 to "subject" of "PrettyXMLSerializer" has incompatible type "Node"; expected "Identifier"
|
||||
self.subject(item) # type: ignore[arg-type]
|
||||
|
||||
if not isinstance(item, URIRef):
|
||||
# type error: Invalid index type "Node" for "Dict[Identifier, int]"; expected type "Identifier"
|
||||
self.__serialized[item] = 1 # type: ignore[index]
|
||||
else:
|
||||
if first(
|
||||
store.triples_choices(
|
||||
# type error: Argument 1 to "triples_choices" of "Graph" has incompatible type "Tuple[Identifier, URIRef, List[URIRef]]"; expected "Union[Tuple[List[Node], Node, Node], Tuple[Node, List[Node], Node], Tuple[Node, Node, List[Node]]]"
|
||||
(object, RDF.type, [OWL_NS.Class, RDFS.Class]) # type: ignore[arg-type]
|
||||
)
|
||||
) and isinstance(object, URIRef):
|
||||
writer.attribute(RDFVOC.resource, self.relativize(object))
|
||||
|
||||
elif depth <= self.max_depth:
|
||||
self.subject(object, depth + 1)
|
||||
|
||||
elif isinstance(object, BNode):
|
||||
if (
|
||||
object not in self.__serialized
|
||||
and (object, None, None) in store
|
||||
and len(list(store.subjects(object=object))) == 1
|
||||
):
|
||||
# inline blank nodes if they haven't been serialized yet
|
||||
# and are only referenced once (regardless of depth)
|
||||
self.subject(object, depth + 1)
|
||||
else:
|
||||
writer.attribute(RDFVOC.nodeID, fix(object))
|
||||
|
||||
else:
|
||||
writer.attribute(RDFVOC.resource, self.relativize(object))
|
||||
|
||||
writer.pop(predicate)
|
||||
@@ -0,0 +1,121 @@
|
||||
"""
|
||||
Trig RDF graph serializer for RDFLib.
|
||||
See <http://www.w3.org/TR/trig/> for syntax specification.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import IO, TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
|
||||
|
||||
from rdflib.graph import ConjunctiveGraph, Graph
|
||||
from rdflib.plugins.serializers.turtle import TurtleSerializer
|
||||
from rdflib.term import BNode, Node
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from rdflib.graph import _ContextType, _SubjectType
|
||||
|
||||
__all__ = ["TrigSerializer"]
|
||||
|
||||
|
||||
class TrigSerializer(TurtleSerializer):
|
||||
short_name = "trig"
|
||||
indentString = 4 * " "
|
||||
|
||||
def __init__(self, store: Union[Graph, ConjunctiveGraph]):
|
||||
self.default_context: Optional[Node]
|
||||
if store.context_aware:
|
||||
if TYPE_CHECKING:
|
||||
assert isinstance(store, ConjunctiveGraph)
|
||||
self.contexts = list(store.contexts())
|
||||
self.default_context = store.default_context.identifier
|
||||
if store.default_context:
|
||||
self.contexts.append(store.default_context)
|
||||
else:
|
||||
self.contexts = [store]
|
||||
self.default_context = None
|
||||
|
||||
super(TrigSerializer, self).__init__(store)
|
||||
|
||||
def preprocess(self) -> None:
|
||||
for context in self.contexts:
|
||||
# do not write unnecessary prefix (ex: for an empty default graph)
|
||||
if len(context) == 0:
|
||||
continue
|
||||
self.store = context
|
||||
# Don't generate a new prefix for a graph URI if one already exists
|
||||
self.getQName(context.identifier, False)
|
||||
self._subjects = {}
|
||||
|
||||
for triple in context:
|
||||
self.preprocessTriple(triple)
|
||||
|
||||
for subject in self._subjects.keys():
|
||||
self._references[subject] += 1
|
||||
|
||||
self._contexts[context] = (self.orderSubjects(), self._subjects)
|
||||
|
||||
def reset(self) -> None:
|
||||
super(TrigSerializer, self).reset()
|
||||
self._contexts: Dict[
|
||||
_ContextType,
|
||||
Tuple[List[_SubjectType], Dict[_SubjectType, bool]],
|
||||
] = {}
|
||||
|
||||
def serialize(
|
||||
self,
|
||||
stream: IO[bytes],
|
||||
base: Optional[str] = None,
|
||||
encoding: Optional[str] = None,
|
||||
spacious: Optional[bool] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
self.reset()
|
||||
self.stream = stream
|
||||
# if base is given here, use that, if not and a base is set for the graph use that
|
||||
if base is not None:
|
||||
self.base = base
|
||||
elif self.store.base is not None:
|
||||
self.base = self.store.base
|
||||
|
||||
if spacious is not None:
|
||||
self._spacious = spacious
|
||||
|
||||
self.preprocess()
|
||||
|
||||
self.startDocument()
|
||||
|
||||
firstTime = True
|
||||
for store, (ordered_subjects, subjects) in self._contexts.items():
|
||||
if not ordered_subjects:
|
||||
continue
|
||||
|
||||
self._serialized = {}
|
||||
self.store = store
|
||||
self._subjects = subjects
|
||||
|
||||
if self.default_context and store.identifier == self.default_context:
|
||||
self.write(self.indent() + "\n{")
|
||||
else:
|
||||
iri: Optional[str]
|
||||
if isinstance(store.identifier, BNode):
|
||||
iri = store.identifier.n3()
|
||||
else:
|
||||
# Show the full graph URI if a prefix for it doesn't already exist
|
||||
iri = self.getQName(store.identifier, False)
|
||||
if iri is None:
|
||||
iri = store.identifier.n3()
|
||||
self.write(self.indent() + "\n%s {" % iri)
|
||||
|
||||
self.depth += 1
|
||||
for subject in ordered_subjects:
|
||||
if self.isDone(subject):
|
||||
continue
|
||||
if firstTime:
|
||||
firstTime = False
|
||||
if self.statement(subject) and not firstTime:
|
||||
self.write("\n")
|
||||
self.depth -= 1
|
||||
self.write("}\n")
|
||||
|
||||
self.endDocument()
|
||||
stream.write("\n".encode("latin-1"))
|
||||
@@ -0,0 +1,91 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import IO, Any, Optional
|
||||
|
||||
from rdflib.graph import ConjunctiveGraph, Graph
|
||||
from rdflib.namespace import Namespace
|
||||
from rdflib.plugins.serializers.xmlwriter import XMLWriter
|
||||
from rdflib.serializer import Serializer
|
||||
from rdflib.term import BNode, Literal, URIRef
|
||||
|
||||
__all__ = ["TriXSerializer"]
|
||||
|
||||
# TODO: Move this somewhere central
|
||||
TRIXNS = Namespace("http://www.w3.org/2004/03/trix/trix-1/")
|
||||
XMLNS = Namespace("http://www.w3.org/XML/1998/namespace")
|
||||
|
||||
|
||||
class TriXSerializer(Serializer):
|
||||
def __init__(self, store: Graph):
|
||||
super(TriXSerializer, self).__init__(store)
|
||||
if not store.context_aware:
|
||||
raise Exception(
|
||||
"TriX serialization only makes sense for context-aware stores"
|
||||
)
|
||||
|
||||
def serialize(
|
||||
self,
|
||||
stream: IO[bytes],
|
||||
base: Optional[str] = None,
|
||||
encoding: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
nm = self.store.namespace_manager
|
||||
|
||||
self.writer = XMLWriter(stream, nm, encoding, extra_ns={"": TRIXNS})
|
||||
|
||||
self.writer.push(TRIXNS["TriX"])
|
||||
# if base is given here, use that, if not and a base is set for the graph use that
|
||||
if base is None and self.store.base is not None:
|
||||
base = self.store.base
|
||||
if base is not None:
|
||||
self.writer.attribute("http://www.w3.org/XML/1998/namespacebase", base)
|
||||
self.writer.namespaces()
|
||||
|
||||
if isinstance(self.store, ConjunctiveGraph):
|
||||
for subgraph in self.store.contexts():
|
||||
self._writeGraph(subgraph)
|
||||
elif isinstance(self.store, Graph):
|
||||
self._writeGraph(self.store)
|
||||
else:
|
||||
raise Exception(f"Unknown graph type: {type(self.store)}")
|
||||
|
||||
self.writer.pop()
|
||||
stream.write("\n".encode("latin-1"))
|
||||
|
||||
def _writeGraph(self, graph): # noqa: N802
|
||||
self.writer.push(TRIXNS["graph"])
|
||||
if graph.base:
|
||||
self.writer.attribute(
|
||||
"http://www.w3.org/XML/1998/namespacebase", graph.base
|
||||
)
|
||||
if isinstance(graph.identifier, URIRef):
|
||||
self.writer.element(TRIXNS["uri"], content=str(graph.identifier))
|
||||
|
||||
for triple in graph.triples((None, None, None)):
|
||||
self._writeTriple(triple)
|
||||
self.writer.pop()
|
||||
|
||||
def _writeTriple(self, triple): # noqa: N802
|
||||
self.writer.push(TRIXNS["triple"])
|
||||
for component in triple:
|
||||
if isinstance(component, URIRef):
|
||||
self.writer.element(TRIXNS["uri"], content=str(component))
|
||||
elif isinstance(component, BNode):
|
||||
self.writer.element(TRIXNS["id"], content=str(component))
|
||||
elif isinstance(component, Literal):
|
||||
if component.datatype:
|
||||
self.writer.element(
|
||||
TRIXNS["typedLiteral"],
|
||||
content=str(component),
|
||||
attributes={TRIXNS["datatype"]: str(component.datatype)},
|
||||
)
|
||||
elif component.language:
|
||||
self.writer.element(
|
||||
TRIXNS["plainLiteral"],
|
||||
content=str(component),
|
||||
attributes={XMLNS["lang"]: str(component.language)},
|
||||
)
|
||||
else:
|
||||
self.writer.element(TRIXNS["plainLiteral"], content=str(component))
|
||||
self.writer.pop()
|
||||
@@ -0,0 +1,453 @@
|
||||
"""
|
||||
Turtle RDF graph serializer for RDFLib.
|
||||
See <http://www.w3.org/TeamSubmission/turtle/> for syntax specification.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections import defaultdict
|
||||
from typing import (
|
||||
IO,
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
DefaultDict,
|
||||
Dict,
|
||||
List,
|
||||
Mapping,
|
||||
Optional,
|
||||
Sequence,
|
||||
Tuple,
|
||||
)
|
||||
|
||||
from rdflib.exceptions import Error
|
||||
from rdflib.graph import Graph
|
||||
from rdflib.namespace import RDF, RDFS
|
||||
from rdflib.serializer import Serializer
|
||||
from rdflib.term import BNode, Literal, Node, URIRef
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from rdflib.graph import _PredicateType, _SubjectType, _TripleType
|
||||
|
||||
__all__ = ["RecursiveSerializer", "TurtleSerializer"]
|
||||
|
||||
|
||||
class RecursiveSerializer(Serializer):
|
||||
topClasses = [RDFS.Class]
|
||||
predicateOrder = [RDF.type, RDFS.label]
|
||||
maxDepth = 10
|
||||
indentString = " "
|
||||
roundtrip_prefixes: Tuple[Any, ...] = ()
|
||||
|
||||
def __init__(self, store: Graph):
|
||||
super(RecursiveSerializer, self).__init__(store)
|
||||
self.stream: Optional[IO[bytes]] = None
|
||||
self.reset()
|
||||
|
||||
def addNamespace(self, prefix: str, uri: URIRef) -> None:
|
||||
if prefix in self.namespaces and self.namespaces[prefix] != uri:
|
||||
raise Exception(
|
||||
"Trying to override namespace prefix %s => %s, but it's already bound to %s"
|
||||
% (prefix, uri, self.namespaces[prefix])
|
||||
)
|
||||
self.namespaces[prefix] = uri
|
||||
|
||||
def checkSubject(self, subject: _SubjectType) -> bool:
|
||||
"""Check to see if the subject should be serialized yet"""
|
||||
if (
|
||||
(self.isDone(subject))
|
||||
or (subject not in self._subjects)
|
||||
or ((subject in self._topLevels) and (self.depth > 1))
|
||||
or (isinstance(subject, URIRef) and (self.depth >= self.maxDepth))
|
||||
):
|
||||
return False
|
||||
return True
|
||||
|
||||
def isDone(self, subject: _SubjectType) -> bool:
|
||||
"""Return true if subject is serialized"""
|
||||
return subject in self._serialized
|
||||
|
||||
def orderSubjects(self) -> List[_SubjectType]:
|
||||
seen: Dict[_SubjectType, bool] = {}
|
||||
subjects: List[_SubjectType] = []
|
||||
|
||||
for classURI in self.topClasses:
|
||||
members = list(self.store.subjects(RDF.type, classURI))
|
||||
members.sort()
|
||||
|
||||
subjects.extend(members)
|
||||
for member in members:
|
||||
self._topLevels[member] = True
|
||||
seen[member] = True
|
||||
|
||||
recursable = [
|
||||
(isinstance(subject, BNode), self._references[subject], subject)
|
||||
for subject in self._subjects
|
||||
if subject not in seen
|
||||
]
|
||||
|
||||
recursable.sort()
|
||||
subjects.extend([subject for (isbnode, refs, subject) in recursable])
|
||||
|
||||
return subjects
|
||||
|
||||
def preprocess(self) -> None:
|
||||
for triple in self.store.triples((None, None, None)):
|
||||
self.preprocessTriple(triple)
|
||||
|
||||
def preprocessTriple(self, spo: _TripleType) -> None:
|
||||
s, p, o = spo
|
||||
self._references[o] += 1
|
||||
self._subjects[s] = True
|
||||
|
||||
def reset(self) -> None:
|
||||
self.depth = 0
|
||||
# Typed none because nothing is using it ...
|
||||
self.lists: Dict[None, None] = {}
|
||||
self.namespaces: Dict[str, URIRef] = {}
|
||||
self._references: DefaultDict[Node, int] = defaultdict(int)
|
||||
self._serialized: Dict[_SubjectType, bool] = {}
|
||||
self._subjects: Dict[_SubjectType, bool] = {}
|
||||
self._topLevels: Dict[_SubjectType, bool] = {}
|
||||
|
||||
if self.roundtrip_prefixes:
|
||||
if hasattr(self.roundtrip_prefixes, "__iter__"):
|
||||
for prefix, ns in self.store.namespaces():
|
||||
if prefix in self.roundtrip_prefixes:
|
||||
self.addNamespace(prefix, ns)
|
||||
else:
|
||||
for prefix, ns in self.store.namespaces():
|
||||
self.addNamespace(prefix, ns)
|
||||
|
||||
def buildPredicateHash(
|
||||
self, subject: _SubjectType
|
||||
) -> Mapping[_PredicateType, List[Node]]:
|
||||
"""
|
||||
Build a hash key by predicate to a list of objects for the given
|
||||
subject
|
||||
"""
|
||||
properties: Dict[_PredicateType, List[Node]] = {}
|
||||
for s, p, o in self.store.triples((subject, None, None)):
|
||||
oList = properties.get(p, [])
|
||||
oList.append(o)
|
||||
properties[p] = oList
|
||||
return properties
|
||||
|
||||
def sortProperties(
|
||||
self, properties: Mapping[_PredicateType, List[Node]]
|
||||
) -> List[_PredicateType]:
|
||||
"""Take a hash from predicate uris to lists of values.
|
||||
Sort the lists of values. Return a sorted list of properties."""
|
||||
# Sort object lists
|
||||
for prop, objects in properties.items():
|
||||
objects.sort()
|
||||
|
||||
# Make sorted list of properties
|
||||
propList: List[_PredicateType] = []
|
||||
seen: Dict[_PredicateType, bool] = {}
|
||||
for prop in self.predicateOrder:
|
||||
if (prop in properties) and (prop not in seen):
|
||||
propList.append(prop)
|
||||
seen[prop] = True
|
||||
props = list(properties.keys())
|
||||
props.sort()
|
||||
for prop in props:
|
||||
if prop not in seen:
|
||||
propList.append(prop)
|
||||
seen[prop] = True
|
||||
return propList
|
||||
|
||||
def subjectDone(self, subject: _SubjectType) -> None:
|
||||
"""Mark a subject as done."""
|
||||
self._serialized[subject] = True
|
||||
|
||||
def indent(self, modifier: int = 0) -> str:
|
||||
"""Returns indent string multiplied by the depth"""
|
||||
return (self.depth + modifier) * self.indentString
|
||||
|
||||
def write(self, text: str) -> None:
|
||||
"""Write text in given encoding."""
|
||||
# type error: Item "None" of "Optional[IO[bytes]]" has no attribute "write"
|
||||
self.stream.write(text.encode(self.encoding, "replace")) # type: ignore[union-attr]
|
||||
|
||||
|
||||
SUBJECT = 0
|
||||
VERB = 1
|
||||
OBJECT = 2
|
||||
|
||||
_GEN_QNAME_FOR_DT = False
|
||||
_SPACIOUS_OUTPUT = False
|
||||
|
||||
|
||||
class TurtleSerializer(RecursiveSerializer):
|
||||
short_name = "turtle"
|
||||
indentString = " "
|
||||
|
||||
def __init__(self, store: Graph):
|
||||
self._ns_rewrite: Dict[str, str] = {}
|
||||
super(TurtleSerializer, self).__init__(store)
|
||||
self.keywords: Dict[Node, str] = {RDF.type: "a"}
|
||||
self.reset()
|
||||
self.stream = None
|
||||
self._spacious = _SPACIOUS_OUTPUT
|
||||
|
||||
# type error: Return type "str" of "addNamespace" incompatible with return type "None" in supertype "RecursiveSerializer"
|
||||
def addNamespace(self, prefix: str, namespace: URIRef) -> str: # type: ignore[override]
|
||||
# Turtle does not support prefix that start with _
|
||||
# if they occur in the graph, rewrite to p_blah
|
||||
# this is more complicated since we need to make sure p_blah
|
||||
# does not already exist. And we register namespaces as we go, i.e.
|
||||
# we may first see a triple with prefix _9 - rewrite it to p_9
|
||||
# and then later find a triple with a "real" p_9 prefix
|
||||
|
||||
# so we need to keep track of ns rewrites we made so far.
|
||||
|
||||
if (prefix > "" and prefix[0] == "_") or self.namespaces.get(
|
||||
prefix, namespace
|
||||
) != namespace:
|
||||
if prefix not in self._ns_rewrite:
|
||||
p = "p" + prefix
|
||||
while p in self.namespaces:
|
||||
p = "p" + p
|
||||
self._ns_rewrite[prefix] = p
|
||||
|
||||
prefix = self._ns_rewrite.get(prefix, prefix)
|
||||
|
||||
super(TurtleSerializer, self).addNamespace(prefix, namespace)
|
||||
return prefix
|
||||
|
||||
def reset(self) -> None:
|
||||
super(TurtleSerializer, self).reset()
|
||||
# typing as Dict[None, None] because nothing seems to be using it
|
||||
self._shortNames: Dict[None, None] = {}
|
||||
self._started = False
|
||||
self._ns_rewrite = {}
|
||||
|
||||
def serialize(
|
||||
self,
|
||||
stream: IO[bytes],
|
||||
base: Optional[str] = None,
|
||||
encoding: Optional[str] = None,
|
||||
spacious: Optional[bool] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
self.reset()
|
||||
self.stream = stream
|
||||
# if base is given here, use that, if not and a base is set for the graph use that
|
||||
if base is not None:
|
||||
self.base = base
|
||||
elif self.store.base is not None:
|
||||
self.base = self.store.base
|
||||
|
||||
if spacious is not None:
|
||||
self._spacious = spacious
|
||||
|
||||
self.preprocess()
|
||||
subjects_list = self.orderSubjects()
|
||||
|
||||
self.startDocument()
|
||||
|
||||
firstTime = True
|
||||
for subject in subjects_list:
|
||||
if self.isDone(subject):
|
||||
continue
|
||||
if firstTime:
|
||||
firstTime = False
|
||||
if self.statement(subject) and not firstTime:
|
||||
self.write("\n")
|
||||
|
||||
self.endDocument()
|
||||
stream.write("\n".encode("latin-1"))
|
||||
|
||||
self.base = None
|
||||
|
||||
def preprocessTriple(self, triple: _TripleType) -> None:
|
||||
super(TurtleSerializer, self).preprocessTriple(triple)
|
||||
for i, node in enumerate(triple):
|
||||
if i == VERB and node in self.keywords:
|
||||
# predicate is a keyword
|
||||
continue
|
||||
# Don't use generated prefixes for subjects and objects
|
||||
self.getQName(node, gen_prefix=(i == VERB))
|
||||
if isinstance(node, Literal) and node.datatype:
|
||||
self.getQName(node.datatype, gen_prefix=_GEN_QNAME_FOR_DT)
|
||||
p = triple[1]
|
||||
if isinstance(p, BNode): # hmm - when is P ever a bnode?
|
||||
self._references[p] += 1
|
||||
|
||||
# TODO: Rename to get_pname
|
||||
def getQName(self, uri: Node, gen_prefix: bool = True) -> Optional[str]:
|
||||
if not isinstance(uri, URIRef):
|
||||
return None
|
||||
|
||||
parts = None
|
||||
|
||||
try:
|
||||
parts = self.store.compute_qname(uri, generate=gen_prefix)
|
||||
except Exception:
|
||||
# is the uri a namespace in itself?
|
||||
pfx = self.store.store.prefix(uri)
|
||||
|
||||
if pfx is not None:
|
||||
parts = (pfx, uri, "")
|
||||
else:
|
||||
# nothing worked
|
||||
return None
|
||||
|
||||
prefix, namespace, local = parts
|
||||
|
||||
local = local.replace(r"(", r"\(").replace(r")", r"\)")
|
||||
|
||||
# QName cannot end with .
|
||||
if local.endswith("."):
|
||||
return None
|
||||
|
||||
prefix = self.addNamespace(prefix, namespace)
|
||||
|
||||
return "%s:%s" % (prefix, local)
|
||||
|
||||
def startDocument(self) -> None:
|
||||
self._started = True
|
||||
ns_list = sorted(self.namespaces.items())
|
||||
|
||||
if self.base:
|
||||
self.write(self.indent() + "@base <%s> .\n" % self.base)
|
||||
for prefix, uri in ns_list:
|
||||
self.write(self.indent() + "@prefix %s: <%s> .\n" % (prefix, uri))
|
||||
if ns_list and self._spacious:
|
||||
self.write("\n")
|
||||
|
||||
def endDocument(self) -> None:
|
||||
if self._spacious:
|
||||
self.write("\n")
|
||||
|
||||
def statement(self, subject: _SubjectType) -> bool:
|
||||
self.subjectDone(subject)
|
||||
return self.s_squared(subject) or self.s_default(subject)
|
||||
|
||||
def s_default(self, subject: _SubjectType) -> bool:
|
||||
self.write("\n" + self.indent())
|
||||
self.path(subject, SUBJECT)
|
||||
self.predicateList(subject)
|
||||
self.write(" .")
|
||||
return True
|
||||
|
||||
def s_squared(self, subject: _SubjectType) -> bool:
|
||||
if (self._references[subject] > 0) or not isinstance(subject, BNode):
|
||||
return False
|
||||
self.write("\n" + self.indent() + "[]")
|
||||
self.predicateList(subject)
|
||||
self.write(" .")
|
||||
return True
|
||||
|
||||
def path(self, node: Node, position: int, newline: bool = False) -> None:
|
||||
if not (
|
||||
self.p_squared(node, position, newline)
|
||||
or self.p_default(node, position, newline)
|
||||
):
|
||||
raise Error("Cannot serialize node '%s'" % (node,))
|
||||
|
||||
def p_default(self, node: Node, position: int, newline: bool = False) -> bool:
|
||||
if position != SUBJECT and not newline:
|
||||
self.write(" ")
|
||||
self.write(self.label(node, position))
|
||||
return True
|
||||
|
||||
def label(self, node: Node, position: int) -> str:
|
||||
if node == RDF.nil:
|
||||
return "()"
|
||||
if position is VERB and node in self.keywords:
|
||||
return self.keywords[node]
|
||||
if isinstance(node, Literal):
|
||||
return node._literal_n3(
|
||||
use_plain=True,
|
||||
qname_callback=lambda dt: self.getQName(dt, _GEN_QNAME_FOR_DT),
|
||||
)
|
||||
else:
|
||||
node = self.relativize(node) # type: ignore[type-var]
|
||||
|
||||
return self.getQName(node, position == VERB) or node.n3()
|
||||
|
||||
def p_squared(self, node: Node, position: int, newline: bool = False) -> bool:
|
||||
if (
|
||||
not isinstance(node, BNode)
|
||||
or node in self._serialized
|
||||
or self._references[node] > 1
|
||||
or position == SUBJECT
|
||||
):
|
||||
return False
|
||||
|
||||
if not newline:
|
||||
self.write(" ")
|
||||
|
||||
if self.isValidList(node):
|
||||
# this is a list
|
||||
self.write("(")
|
||||
self.depth += 1 # 2
|
||||
self.doList(node)
|
||||
self.depth -= 1 # 2
|
||||
self.write(" )")
|
||||
else:
|
||||
self.subjectDone(node)
|
||||
self.depth += 2
|
||||
# self.write('[\n' + self.indent())
|
||||
self.write("[")
|
||||
self.depth -= 1
|
||||
# self.predicateList(node, newline=True)
|
||||
self.predicateList(node, newline=False)
|
||||
# self.write('\n' + self.indent() + ']')
|
||||
self.write(" ]")
|
||||
self.depth -= 1
|
||||
|
||||
return True
|
||||
|
||||
def isValidList(self, l_: Node) -> bool:
|
||||
"""
|
||||
Checks if l is a valid RDF list, i.e. no nodes have other properties.
|
||||
"""
|
||||
try:
|
||||
if self.store.value(l_, RDF.first) is None:
|
||||
return False
|
||||
except Exception:
|
||||
return False
|
||||
while l_:
|
||||
if l_ != RDF.nil and len(list(self.store.predicate_objects(l_))) != 2:
|
||||
return False
|
||||
# type error: Incompatible types in assignment (expression has type "Optional[Node]", variable has type "Node")
|
||||
l_ = self.store.value(l_, RDF.rest) # type: ignore[assignment]
|
||||
return True
|
||||
|
||||
def doList(self, l_: Node) -> None:
|
||||
while l_:
|
||||
item = self.store.value(l_, RDF.first)
|
||||
if item is not None:
|
||||
self.path(item, OBJECT)
|
||||
self.subjectDone(l_)
|
||||
# type error: Incompatible types in assignment (expression has type "Optional[Node]", variable has type "Node")
|
||||
l_ = self.store.value(l_, RDF.rest) # type: ignore[assignment]
|
||||
|
||||
def predicateList(self, subject: Node, newline: bool = False) -> None:
|
||||
properties = self.buildPredicateHash(subject)
|
||||
propList = self.sortProperties(properties)
|
||||
if len(propList) == 0:
|
||||
return
|
||||
self.verb(propList[0], newline=newline)
|
||||
self.objectList(properties[propList[0]])
|
||||
for predicate in propList[1:]:
|
||||
self.write(" ;\n" + self.indent(1))
|
||||
self.verb(predicate, newline=True)
|
||||
self.objectList(properties[predicate])
|
||||
|
||||
def verb(self, node: Node, newline: bool = False) -> None:
|
||||
self.path(node, VERB, newline)
|
||||
|
||||
def objectList(self, objects: Sequence[Node]) -> None:
|
||||
count = len(objects)
|
||||
if count == 0:
|
||||
return
|
||||
depthmod = (count == 1) and 0 or 1
|
||||
self.depth += depthmod
|
||||
self.path(objects[0], OBJECT)
|
||||
for obj in objects[1:]:
|
||||
self.write(",\n" + self.indent(1))
|
||||
self.path(obj, OBJECT, newline=True)
|
||||
self.depth -= depthmod
|
||||
+128
@@ -0,0 +1,128 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import codecs
|
||||
from typing import IO, TYPE_CHECKING, Dict, Iterable, List, Optional, Tuple
|
||||
from xml.sax.saxutils import escape, quoteattr
|
||||
|
||||
from rdflib.term import URIRef
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from rdflib.namespace import Namespace, NamespaceManager
|
||||
|
||||
|
||||
__all__ = ["XMLWriter"]
|
||||
|
||||
ESCAPE_ENTITIES = {"\r": " "}
|
||||
|
||||
|
||||
class XMLWriter:
|
||||
def __init__(
|
||||
self,
|
||||
stream: IO[bytes],
|
||||
namespace_manager: NamespaceManager,
|
||||
encoding: Optional[str] = None,
|
||||
decl: int = 1,
|
||||
extra_ns: Optional[Dict[str, Namespace]] = None,
|
||||
):
|
||||
encoding = encoding or "utf-8"
|
||||
encoder, decoder, stream_reader, stream_writer = codecs.lookup(encoding)
|
||||
# NOTE on type ignores: this is mainly because the variable is being re-used.
|
||||
# type error: Incompatible types in assignment (expression has type "StreamWriter", variable has type "IO[bytes]")
|
||||
self.stream = stream = stream_writer(stream) # type: ignore[assignment]
|
||||
if decl:
|
||||
# type error: No overload variant of "write" of "IO" matches argument type "str"
|
||||
stream.write('<?xml version="1.0" encoding="%s"?>' % encoding) # type: ignore[call-overload]
|
||||
self.element_stack: List[str] = []
|
||||
self.nm = namespace_manager
|
||||
self.extra_ns = extra_ns or {}
|
||||
self.closed = True
|
||||
|
||||
def __get_indent(self) -> str:
|
||||
return " " * len(self.element_stack)
|
||||
|
||||
indent = property(__get_indent)
|
||||
|
||||
def __close_start_tag(self) -> None:
|
||||
if not self.closed: # TODO:
|
||||
self.closed = True
|
||||
self.stream.write(">")
|
||||
|
||||
def push(self, uri: str) -> None:
|
||||
self.__close_start_tag()
|
||||
write = self.stream.write
|
||||
write("\n")
|
||||
write(self.indent)
|
||||
write("<%s" % self.qname(uri))
|
||||
self.element_stack.append(uri)
|
||||
self.closed = False
|
||||
self.parent = False
|
||||
|
||||
def pop(self, uri: Optional[str] = None) -> None:
|
||||
top = self.element_stack.pop()
|
||||
if uri:
|
||||
assert uri == top
|
||||
write = self.stream.write
|
||||
if not self.closed:
|
||||
self.closed = True
|
||||
write("/>")
|
||||
else:
|
||||
if self.parent:
|
||||
write("\n")
|
||||
write(self.indent)
|
||||
write("</%s>" % self.qname(top))
|
||||
self.parent = True
|
||||
|
||||
def element(
|
||||
self, uri: str, content: str, attributes: Dict[URIRef, str] = {}
|
||||
) -> None:
|
||||
"""Utility method for adding a complete simple element"""
|
||||
self.push(uri)
|
||||
for k, v in attributes.items():
|
||||
self.attribute(k, v)
|
||||
self.text(content)
|
||||
self.pop()
|
||||
|
||||
def namespaces(self, namespaces: Iterable[Tuple[str, str]] = None) -> None:
|
||||
if not namespaces:
|
||||
namespaces = self.nm.namespaces()
|
||||
|
||||
write = self.stream.write
|
||||
write("\n")
|
||||
for prefix, namespace in namespaces:
|
||||
if prefix:
|
||||
write(' xmlns:%s="%s"\n' % (prefix, namespace))
|
||||
# Allow user-provided namespace bindings to prevail
|
||||
elif prefix not in self.extra_ns:
|
||||
write(' xmlns="%s"\n' % namespace)
|
||||
|
||||
for prefix, namespace in self.extra_ns.items():
|
||||
if prefix:
|
||||
write(' xmlns:%s="%s"\n' % (prefix, namespace))
|
||||
else:
|
||||
write(' xmlns="%s"\n' % namespace)
|
||||
|
||||
def attribute(self, uri: str, value: str) -> None:
|
||||
write = self.stream.write
|
||||
write(" %s=%s" % (self.qname(uri), quoteattr(value)))
|
||||
|
||||
def text(self, text: str) -> None:
|
||||
self.__close_start_tag()
|
||||
if "<" in text and ">" in text and "]]>" not in text:
|
||||
self.stream.write("<![CDATA[")
|
||||
self.stream.write(text)
|
||||
self.stream.write("]]>")
|
||||
else:
|
||||
self.stream.write(escape(text, ESCAPE_ENTITIES))
|
||||
|
||||
def qname(self, uri: str) -> str:
|
||||
"""Compute qname for a uri using our extra namespaces,
|
||||
or the given namespace manager"""
|
||||
|
||||
for pre, ns in self.extra_ns.items():
|
||||
if uri.startswith(ns):
|
||||
if pre != "":
|
||||
return ":".join([pre, uri[len(ns) :]])
|
||||
else:
|
||||
return uri[len(ns) :]
|
||||
|
||||
return self.nm.qname_strict(uri)
|
||||
+676
@@ -0,0 +1,676 @@
|
||||
"""
|
||||
Implementation of the JSON-LD Context structure. See:
|
||||
|
||||
http://json-ld.org/
|
||||
|
||||
"""
|
||||
|
||||
# https://github.com/RDFLib/rdflib-jsonld/blob/feature/json-ld-1.1/rdflib_jsonld/context.py
|
||||
from __future__ import annotations
|
||||
|
||||
from collections import namedtuple
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Collection,
|
||||
Dict,
|
||||
Generator,
|
||||
List,
|
||||
Optional,
|
||||
Set,
|
||||
Tuple,
|
||||
Union,
|
||||
)
|
||||
from urllib.parse import urljoin, urlsplit
|
||||
|
||||
from rdflib.namespace import RDF
|
||||
|
||||
from .errors import (
|
||||
INVALID_CONTEXT_ENTRY,
|
||||
INVALID_REMOTE_CONTEXT,
|
||||
RECURSIVE_CONTEXT_INCLUSION,
|
||||
)
|
||||
from .keys import (
|
||||
BASE,
|
||||
CONTAINER,
|
||||
CONTEXT,
|
||||
GRAPH,
|
||||
ID,
|
||||
IMPORT,
|
||||
INCLUDED,
|
||||
INDEX,
|
||||
JSON,
|
||||
LANG,
|
||||
LIST,
|
||||
NEST,
|
||||
NONE,
|
||||
PREFIX,
|
||||
PROPAGATE,
|
||||
PROTECTED,
|
||||
REV,
|
||||
SET,
|
||||
TYPE,
|
||||
VALUE,
|
||||
VERSION,
|
||||
VOCAB,
|
||||
)
|
||||
from .util import norm_url, source_to_json, split_iri
|
||||
|
||||
NODE_KEYS = {GRAPH, ID, INCLUDED, JSON, LIST, NEST, NONE, REV, SET, TYPE, VALUE, LANG}
|
||||
|
||||
|
||||
class Defined(int):
|
||||
pass
|
||||
|
||||
|
||||
UNDEF = Defined(0)
|
||||
|
||||
# From <https://tools.ietf.org/html/rfc3986#section-2.2>
|
||||
URI_GEN_DELIMS = (":", "/", "?", "#", "[", "]", "@")
|
||||
|
||||
_ContextSourceType = Union[
|
||||
List[Union[Dict[str, Any], str, None]], Dict[str, Any], str, None
|
||||
]
|
||||
|
||||
|
||||
class Context:
|
||||
def __init__(
|
||||
self,
|
||||
source: _ContextSourceType = None,
|
||||
base: Optional[str] = None,
|
||||
version: Optional[float] = 1.1,
|
||||
):
|
||||
self.version: float = version or 1.1
|
||||
self.language = None
|
||||
self.vocab: Optional[str] = None
|
||||
self._base: Optional[str]
|
||||
self.base = base
|
||||
self.doc_base = base
|
||||
self.terms: Dict[str, Any] = {}
|
||||
# _alias maps NODE_KEY to list of aliases
|
||||
self._alias: Dict[str, List[str]] = {}
|
||||
self._lookup: Dict[Tuple[str, Any, Union[Defined, str], bool], Term] = {}
|
||||
self._prefixes: Dict[str, Any] = {}
|
||||
self.active = False
|
||||
self.parent: Optional[Context] = None
|
||||
self.propagate = True
|
||||
self._context_cache: Dict[str, Any] = {}
|
||||
if source:
|
||||
self.load(source)
|
||||
|
||||
@property
|
||||
def base(self) -> Optional[str]:
|
||||
return self._base
|
||||
|
||||
@base.setter
|
||||
def base(self, base: Optional[str]):
|
||||
if base:
|
||||
hash_index = base.find("#")
|
||||
if hash_index > -1:
|
||||
base = base[0:hash_index]
|
||||
self._base = (
|
||||
self.resolve_iri(base)
|
||||
if (hasattr(self, "_base") and base is not None)
|
||||
else base
|
||||
)
|
||||
self._basedomain = "%s://%s" % urlsplit(base)[0:2] if base else None
|
||||
|
||||
def subcontext(self, source: Any, propagate: bool = True) -> Context:
|
||||
# IMPROVE: to optimize, implement SubContext with parent fallback support
|
||||
parent = self.parent if self.propagate is False else self
|
||||
# type error: Item "None" of "Optional[Context]" has no attribute "_subcontext"
|
||||
return parent._subcontext(source, propagate) # type: ignore[union-attr]
|
||||
|
||||
def _subcontext(self, source: Any, propagate: bool) -> Context:
|
||||
ctx = Context(version=self.version)
|
||||
ctx.propagate = propagate
|
||||
ctx.parent = self
|
||||
ctx.language = self.language
|
||||
ctx.vocab = self.vocab
|
||||
ctx.base = self.base
|
||||
ctx.doc_base = self.doc_base
|
||||
ctx._alias = {k: l[:] for k, l in self._alias.items()} # noqa: E741
|
||||
ctx.terms = self.terms.copy()
|
||||
ctx._lookup = self._lookup.copy()
|
||||
ctx._prefixes = self._prefixes.copy()
|
||||
ctx._context_cache = self._context_cache
|
||||
ctx.load(source)
|
||||
return ctx
|
||||
|
||||
def _clear(self) -> None:
|
||||
self.language = None
|
||||
self.vocab = None
|
||||
self.terms = {}
|
||||
self._alias = {}
|
||||
self._lookup = {}
|
||||
self._prefixes = {}
|
||||
self.active = False
|
||||
self.propagate = True
|
||||
|
||||
def get_context_for_term(self, term: Optional[Term]) -> Context:
|
||||
if term and term.context is not UNDEF:
|
||||
return self._subcontext(term.context, propagate=True)
|
||||
return self
|
||||
|
||||
def get_context_for_type(self, node: Any) -> Optional[Context]:
|
||||
if self.version >= 1.1:
|
||||
rtype = self.get_type(node) if isinstance(node, dict) else None
|
||||
if not isinstance(rtype, list):
|
||||
rtype = [rtype] if rtype else []
|
||||
|
||||
typeterm = None
|
||||
for rt in rtype:
|
||||
try:
|
||||
typeterm = self.terms.get(rt)
|
||||
except TypeError:
|
||||
# extra lenience, triggers if type is set to a literal
|
||||
pass
|
||||
if typeterm is not None:
|
||||
break
|
||||
|
||||
if typeterm and typeterm.context:
|
||||
subcontext = self.subcontext(typeterm.context, propagate=False)
|
||||
if subcontext:
|
||||
return subcontext
|
||||
|
||||
return self.parent if self.propagate is False else self
|
||||
|
||||
def get_id(self, obj: Dict[str, Any]) -> Any:
|
||||
return self._get(obj, ID)
|
||||
|
||||
def get_type(self, obj: Dict[str, Any]) -> Any:
|
||||
return self._get(obj, TYPE)
|
||||
|
||||
def get_language(self, obj: Dict[str, Any]) -> Any:
|
||||
return self._get(obj, LANG)
|
||||
|
||||
def get_value(self, obj: Dict[str, Any]) -> Any:
|
||||
return self._get(obj, VALUE)
|
||||
|
||||
def get_graph(self, obj: Dict[str, Any]) -> Any:
|
||||
return self._get(obj, GRAPH)
|
||||
|
||||
def get_list(self, obj: Dict[str, Any]) -> Any:
|
||||
return self._get(obj, LIST)
|
||||
|
||||
def get_set(self, obj: Dict[str, Any]) -> Any:
|
||||
return self._get(obj, SET)
|
||||
|
||||
def get_rev(self, obj: Dict[str, Any]) -> Any:
|
||||
return self._get(obj, REV)
|
||||
|
||||
def _get(self, obj: Dict[str, Any], key: str) -> Any:
|
||||
for alias in self._alias.get(key, []):
|
||||
if alias in obj:
|
||||
return obj.get(alias)
|
||||
return obj.get(key)
|
||||
|
||||
# type error: Missing return statement
|
||||
def get_key(self, key: str) -> str: # type: ignore[return]
|
||||
for alias in self.get_keys(key):
|
||||
return alias
|
||||
|
||||
def get_keys(self, key: str) -> Generator[str, None, None]:
|
||||
if key in self._alias:
|
||||
for alias in self._alias[key]:
|
||||
yield alias
|
||||
yield key
|
||||
|
||||
lang_key = property(lambda self: self.get_key(LANG))
|
||||
id_key = property(lambda self: self.get_key(ID))
|
||||
type_key = property(lambda self: self.get_key(TYPE))
|
||||
value_key = property(lambda self: self.get_key(VALUE))
|
||||
list_key = property(lambda self: self.get_key(LIST))
|
||||
rev_key = property(lambda self: self.get_key(REV))
|
||||
graph_key = property(lambda self: self.get_key(GRAPH))
|
||||
|
||||
def add_term(
|
||||
self,
|
||||
name: str,
|
||||
idref: str,
|
||||
coercion: Union[Defined, str] = UNDEF,
|
||||
container: Union[Collection[Any], str, Defined] = UNDEF,
|
||||
index: Optional[Union[str, Defined]] = None,
|
||||
language: Optional[Union[str, Defined]] = UNDEF,
|
||||
reverse: bool = False,
|
||||
context: Any = UNDEF,
|
||||
prefix: Optional[bool] = None,
|
||||
protected: bool = False,
|
||||
):
|
||||
if self.version < 1.1 or prefix is None:
|
||||
prefix = isinstance(idref, str) and idref.endswith(URI_GEN_DELIMS)
|
||||
|
||||
if not self._accept_term(name):
|
||||
return
|
||||
|
||||
if self.version >= 1.1:
|
||||
existing = self.terms.get(name)
|
||||
if existing and existing.protected:
|
||||
return
|
||||
|
||||
if isinstance(container, (list, set, tuple)):
|
||||
container = set(container)
|
||||
elif container is not UNDEF:
|
||||
container = set([container])
|
||||
else:
|
||||
container = set()
|
||||
|
||||
term = Term(
|
||||
idref,
|
||||
name,
|
||||
coercion,
|
||||
container,
|
||||
index,
|
||||
language,
|
||||
reverse,
|
||||
context,
|
||||
prefix,
|
||||
protected,
|
||||
)
|
||||
|
||||
self.terms[name] = term
|
||||
|
||||
container_key: Union[Defined, str]
|
||||
for container_key in (LIST, LANG, SET): # , INDEX, ID, GRAPH):
|
||||
if container_key in container:
|
||||
break
|
||||
else:
|
||||
container_key = UNDEF
|
||||
|
||||
self._lookup[(idref, coercion or language, container_key, reverse)] = term
|
||||
|
||||
if term.prefix is True:
|
||||
self._prefixes[idref] = name
|
||||
|
||||
def find_term(
|
||||
self,
|
||||
idref: str,
|
||||
coercion: Optional[Union[str, Defined]] = None,
|
||||
container: Union[Defined, str] = UNDEF,
|
||||
language: Optional[str] = None,
|
||||
reverse: bool = False,
|
||||
):
|
||||
lu = self._lookup
|
||||
|
||||
if coercion is None:
|
||||
coercion = language
|
||||
|
||||
if coercion is not UNDEF and container:
|
||||
found = lu.get((idref, coercion, container, reverse))
|
||||
if found:
|
||||
return found
|
||||
|
||||
if coercion is not UNDEF:
|
||||
found = lu.get((idref, coercion, UNDEF, reverse))
|
||||
if found:
|
||||
return found
|
||||
|
||||
if container:
|
||||
found = lu.get((idref, coercion, container, reverse))
|
||||
if found:
|
||||
return found
|
||||
elif language:
|
||||
found = lu.get((idref, UNDEF, LANG, reverse))
|
||||
if found:
|
||||
return found
|
||||
else:
|
||||
found = lu.get((idref, coercion or UNDEF, SET, reverse))
|
||||
if found:
|
||||
return found
|
||||
|
||||
return lu.get((idref, UNDEF, UNDEF, reverse))
|
||||
|
||||
def resolve(self, curie_or_iri: str) -> str:
|
||||
iri = self.expand(curie_or_iri, False)
|
||||
# type error: Argument 1 to "isblank" of "Context" has incompatible type "Optional[str]"; expected "str"
|
||||
if self.isblank(iri): # type: ignore[arg-type]
|
||||
# type error: Incompatible return value type (got "Optional[str]", expected "str")
|
||||
return iri # type: ignore[return-value]
|
||||
# type error: Unsupported right operand type for in ("Optional[str]")
|
||||
if " " in iri: # type: ignore[operator]
|
||||
return ""
|
||||
# type error: Argument 1 to "resolve_iri" of "Context" has incompatible type "Optional[str]"; expected "str"
|
||||
return self.resolve_iri(iri) # type: ignore[arg-type]
|
||||
|
||||
def resolve_iri(self, iri: str) -> str:
|
||||
# type error: Argument 1 to "norm_url" has incompatible type "Optional[str]"; expected "str"
|
||||
return norm_url(self._base, iri) # type: ignore[arg-type]
|
||||
|
||||
def isblank(self, ref: str) -> bool:
|
||||
return ref.startswith("_:")
|
||||
|
||||
def expand(self, term_curie_or_iri: Any, use_vocab: bool = True) -> Optional[str]:
|
||||
if not isinstance(term_curie_or_iri, str):
|
||||
return term_curie_or_iri
|
||||
|
||||
if not self._accept_term(term_curie_or_iri):
|
||||
return ""
|
||||
|
||||
if use_vocab:
|
||||
term = self.terms.get(term_curie_or_iri)
|
||||
if term:
|
||||
return term.id
|
||||
|
||||
is_term, pfx, local = self._prep_expand(term_curie_or_iri)
|
||||
if pfx == "_":
|
||||
return term_curie_or_iri
|
||||
|
||||
if pfx is not None:
|
||||
ns = self.terms.get(pfx)
|
||||
if ns and ns.prefix and ns.id:
|
||||
return ns.id + local
|
||||
elif is_term and use_vocab:
|
||||
if self.vocab:
|
||||
return self.vocab + term_curie_or_iri
|
||||
return None
|
||||
|
||||
return self.resolve_iri(term_curie_or_iri)
|
||||
|
||||
def shrink_iri(self, iri: str) -> str:
|
||||
ns, name = split_iri(str(iri))
|
||||
pfx = self._prefixes.get(ns)
|
||||
if pfx:
|
||||
# type error: Argument 1 to "join" of "str" has incompatible type "Tuple[Any, Optional[str]]"; expected "Iterable[str]"
|
||||
return ":".join((pfx, name)) # type: ignore[arg-type]
|
||||
elif self._base:
|
||||
if str(iri) == self._base:
|
||||
return ""
|
||||
# type error: Argument 1 to "startswith" of "str" has incompatible type "Optional[str]"; expected "Union[str, Tuple[str, ...]]"
|
||||
elif iri.startswith(self._basedomain): # type: ignore[arg-type]
|
||||
# type error: Argument 1 to "len" has incompatible type "Optional[str]"; expected "Sized"
|
||||
return iri[len(self._basedomain) :] # type: ignore[arg-type]
|
||||
return iri
|
||||
|
||||
def to_symbol(self, iri: str) -> Optional[str]:
|
||||
iri = str(iri)
|
||||
term = self.find_term(iri)
|
||||
if term:
|
||||
return term.name
|
||||
ns, name = split_iri(iri)
|
||||
if ns == self.vocab:
|
||||
return name
|
||||
pfx = self._prefixes.get(ns)
|
||||
if pfx:
|
||||
# type error: Argument 1 to "join" of "str" has incompatible type "Tuple[Any, Optional[str]]"; expected "Iterable[str]"
|
||||
return ":".join((pfx, name)) # type: ignore[arg-type]
|
||||
return iri
|
||||
|
||||
def load(
|
||||
self,
|
||||
source: _ContextSourceType,
|
||||
base: Optional[str] = None,
|
||||
referenced_contexts: Set[Any] = None,
|
||||
):
|
||||
self.active = True
|
||||
sources: List[Tuple[Optional[str], Union[Dict[str, Any], str, None]]] = []
|
||||
# "Union[List[Union[Dict[str, Any], str]], List[Dict[str, Any]], List[str]]" : expression
|
||||
# "Union[List[Dict[str, Any]], Dict[str, Any], List[str], str]" : variable
|
||||
source = source if isinstance(source, list) else [source]
|
||||
referenced_contexts = referenced_contexts or set()
|
||||
self._prep_sources(base, source, sources, referenced_contexts)
|
||||
for source_url, source in sources:
|
||||
if source is None:
|
||||
self._clear()
|
||||
else:
|
||||
# type error: Argument 1 to "_read_source" of "Context" has incompatible type "Union[Dict[str, Any], str]"; expected "Dict[str, Any]"
|
||||
self._read_source(source, source_url, referenced_contexts) # type: ignore[arg-type]
|
||||
|
||||
def _accept_term(self, key: str) -> bool:
|
||||
if self.version < 1.1:
|
||||
return True
|
||||
if key and len(key) > 1 and key[0] == "@" and key[1].isalnum():
|
||||
return key in NODE_KEYS
|
||||
else:
|
||||
return True
|
||||
|
||||
def _prep_sources(
|
||||
self,
|
||||
base: Optional[str],
|
||||
inputs: Union[List[Union[Dict[str, Any], str, None]], List[str]],
|
||||
sources: List[Tuple[Optional[str], Union[Dict[str, Any], str, None]]],
|
||||
referenced_contexts: Set[str],
|
||||
in_source_url: Optional[str] = None,
|
||||
):
|
||||
for source in inputs:
|
||||
source_url = in_source_url
|
||||
new_base = base
|
||||
if isinstance(source, str):
|
||||
source_url = source
|
||||
source_doc_base = base or self.doc_base
|
||||
new_ctx = self._fetch_context(
|
||||
source, source_doc_base, referenced_contexts
|
||||
)
|
||||
if new_ctx is None:
|
||||
continue
|
||||
else:
|
||||
if base:
|
||||
if TYPE_CHECKING:
|
||||
# if base is not None, then source_doc_base won't be
|
||||
# none due to how it is assigned.
|
||||
assert source_doc_base is not None
|
||||
new_base = urljoin(source_doc_base, source_url)
|
||||
source = new_ctx
|
||||
|
||||
if isinstance(source, dict):
|
||||
if CONTEXT in source:
|
||||
source = source[CONTEXT]
|
||||
# type ignore: Incompatible types in assignment (expression has type "List[Union[Dict[str, Any], str, None]]", variable has type "Union[Dict[str, Any], str, None]")
|
||||
source = source if isinstance(source, list) else [source] # type: ignore[assignment]
|
||||
|
||||
if isinstance(source, list):
|
||||
# type error: Statement is unreachable
|
||||
self._prep_sources( # type: ignore[unreachable]
|
||||
new_base, source, sources, referenced_contexts, source_url
|
||||
)
|
||||
else:
|
||||
sources.append((source_url, source))
|
||||
|
||||
def _fetch_context(
|
||||
self, source: str, base: Optional[str], referenced_contexts: Set[str]
|
||||
):
|
||||
# type error: Value of type variable "AnyStr" of "urljoin" cannot be "Optional[str]"
|
||||
source_url = urljoin(base, source) # type: ignore[type-var]
|
||||
|
||||
if source_url in referenced_contexts:
|
||||
raise RECURSIVE_CONTEXT_INCLUSION
|
||||
|
||||
# type error: Argument 1 to "add" of "set" has incompatible type "Optional[str]"; expected "str"
|
||||
referenced_contexts.add(source_url) # type: ignore[arg-type]
|
||||
|
||||
if source_url in self._context_cache:
|
||||
return self._context_cache[source_url]
|
||||
|
||||
# type error: Incompatible types in assignment (expression has type "Optional[Any]", variable has type "str")
|
||||
source_json, _ = source_to_json(source_url)
|
||||
if source_json and CONTEXT not in source_json:
|
||||
raise INVALID_REMOTE_CONTEXT
|
||||
|
||||
# type error: Invalid index type "Optional[str]" for "Dict[str, Any]"; expected type "str"
|
||||
self._context_cache[source_url] = source_json # type: ignore[index]
|
||||
|
||||
return source_json
|
||||
|
||||
def _read_source(
|
||||
self,
|
||||
source: Dict[str, Any],
|
||||
source_url: Optional[str] = None,
|
||||
referenced_contexts: Optional[Set[str]] = None,
|
||||
):
|
||||
imports = source.get(IMPORT)
|
||||
if imports:
|
||||
if not isinstance(imports, str):
|
||||
raise INVALID_CONTEXT_ENTRY
|
||||
|
||||
imported = self._fetch_context(
|
||||
imports, self.base, referenced_contexts or set()
|
||||
)
|
||||
if not isinstance(imported, dict):
|
||||
raise INVALID_CONTEXT_ENTRY
|
||||
|
||||
imported = imported[CONTEXT]
|
||||
imported.update(source)
|
||||
source = imported
|
||||
|
||||
self.vocab = source.get(VOCAB, self.vocab)
|
||||
self.version = source.get(VERSION, self.version)
|
||||
protected = source.get(PROTECTED, False)
|
||||
|
||||
for key, value in source.items():
|
||||
if key in {VOCAB, VERSION, IMPORT, PROTECTED}:
|
||||
continue
|
||||
elif key == PROPAGATE and isinstance(value, bool):
|
||||
self.propagate = value
|
||||
elif key == LANG:
|
||||
self.language = value
|
||||
elif key == BASE:
|
||||
if not source_url and not imports:
|
||||
self.base = value
|
||||
else:
|
||||
self._read_term(source, key, value, protected)
|
||||
|
||||
def _read_term(
|
||||
self,
|
||||
source: Dict[str, Any],
|
||||
name: str,
|
||||
dfn: Union[Dict[str, Any], str],
|
||||
protected: bool = False,
|
||||
) -> None:
|
||||
idref = None
|
||||
if isinstance(dfn, dict):
|
||||
# term = self._create_term(source, key, value)
|
||||
rev = dfn.get(REV)
|
||||
protected = dfn.get(PROTECTED, protected)
|
||||
|
||||
coercion = dfn.get(TYPE, UNDEF)
|
||||
if coercion and coercion not in (ID, TYPE, VOCAB):
|
||||
coercion = self._rec_expand(source, coercion)
|
||||
|
||||
idref = rev or dfn.get(ID, UNDEF)
|
||||
if idref == TYPE:
|
||||
idref = str(RDF.type)
|
||||
coercion = VOCAB
|
||||
elif idref is not UNDEF:
|
||||
idref = self._rec_expand(source, idref)
|
||||
elif ":" in name:
|
||||
idref = self._rec_expand(source, name)
|
||||
elif self.vocab:
|
||||
idref = self.vocab + name
|
||||
|
||||
context = dfn.get(CONTEXT, UNDEF)
|
||||
|
||||
self.add_term(
|
||||
name,
|
||||
idref,
|
||||
coercion,
|
||||
dfn.get(CONTAINER, UNDEF),
|
||||
dfn.get(INDEX, UNDEF),
|
||||
dfn.get(LANG, UNDEF),
|
||||
bool(rev),
|
||||
context,
|
||||
dfn.get(PREFIX),
|
||||
protected=protected,
|
||||
)
|
||||
else:
|
||||
if isinstance(dfn, str):
|
||||
if not self._accept_term(dfn):
|
||||
return
|
||||
idref = self._rec_expand(source, dfn)
|
||||
# type error: Argument 2 to "add_term" of "Context" has incompatible type "Optional[str]"; expected "str"
|
||||
self.add_term(name, idref, protected=protected) # type: ignore[arg-type]
|
||||
|
||||
if idref in NODE_KEYS:
|
||||
self._alias.setdefault(idref, []).append(name)
|
||||
else:
|
||||
# undo aliases that may have been inherited from parent context
|
||||
for v in self._alias.values():
|
||||
if name in v:
|
||||
v.remove(name)
|
||||
|
||||
def _rec_expand(
|
||||
self, source: Dict[str, Any], expr: Optional[str], prev: Optional[str] = None
|
||||
) -> Optional[str]:
|
||||
if expr == prev or expr in NODE_KEYS:
|
||||
return expr
|
||||
|
||||
nxt: Optional[str]
|
||||
# type error: Argument 1 to "_prep_expand" of "Context" has incompatible type "Optional[str]"; expected "str"
|
||||
is_term, pfx, nxt = self._prep_expand(expr) # type: ignore[arg-type]
|
||||
if pfx:
|
||||
iri = self._get_source_id(source, pfx)
|
||||
if iri is None:
|
||||
if pfx + ":" == self.vocab:
|
||||
return expr
|
||||
else:
|
||||
term = self.terms.get(pfx)
|
||||
if term:
|
||||
iri = term.id
|
||||
|
||||
if iri is None:
|
||||
nxt = expr
|
||||
else:
|
||||
nxt = iri + nxt
|
||||
else:
|
||||
nxt = self._get_source_id(source, nxt) or nxt
|
||||
if ":" not in nxt and self.vocab:
|
||||
return self.vocab + nxt
|
||||
|
||||
return self._rec_expand(source, nxt, expr)
|
||||
|
||||
def _prep_expand(self, expr: str) -> Tuple[bool, Optional[str], str]:
|
||||
if ":" not in expr:
|
||||
return True, None, expr
|
||||
pfx, local = expr.split(":", 1)
|
||||
if not local.startswith("//"):
|
||||
return False, pfx, local
|
||||
else:
|
||||
return False, None, expr
|
||||
|
||||
def _get_source_id(self, source: Dict[str, Any], key: str) -> Optional[str]:
|
||||
# .. from source dict or if already defined
|
||||
term = source.get(key)
|
||||
if term is None:
|
||||
dfn = self.terms.get(key)
|
||||
if dfn:
|
||||
term = dfn.id
|
||||
elif isinstance(term, dict):
|
||||
term = term.get(ID)
|
||||
return term
|
||||
|
||||
def _term_dict(self, term: Term) -> Union[Dict[str, Any], str]:
|
||||
tdict: Dict[str, Any] = {}
|
||||
if term.type != UNDEF:
|
||||
tdict[TYPE] = self.shrink_iri(term.type)
|
||||
if term.container:
|
||||
tdict[CONTAINER] = list(term.container)
|
||||
if term.language != UNDEF:
|
||||
tdict[LANG] = term.language
|
||||
if term.reverse:
|
||||
tdict[REV] = term.id
|
||||
else:
|
||||
tdict[ID] = term.id
|
||||
if tdict.keys() == {ID}:
|
||||
return tdict[ID]
|
||||
return tdict
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Returns a dictionary representation of the context that can be
|
||||
serialized to JSON.
|
||||
|
||||
:return: a dictionary representation of the context.
|
||||
"""
|
||||
r = {v: k for (k, v) in self._prefixes.items()}
|
||||
r.update({term.name: self._term_dict(term) for term in self._lookup.values()})
|
||||
if self.base:
|
||||
r[BASE] = self.base
|
||||
if self.language:
|
||||
r[LANG] = self.language
|
||||
return r
|
||||
|
||||
|
||||
Term = namedtuple(
|
||||
"Term",
|
||||
"id, name, type, container, index, language, reverse, context," "prefix, protected",
|
||||
)
|
||||
|
||||
Term.__new__.__defaults__ = (UNDEF, UNDEF, UNDEF, UNDEF, False, UNDEF, False, False)
|
||||
@@ -0,0 +1,9 @@
|
||||
# https://github.com/RDFLib/rdflib-jsonld/blob/feature/json-ld-1.1/rdflib_jsonld/errors.py
|
||||
class JSONLDException(ValueError): # noqa: N818
|
||||
pass
|
||||
|
||||
|
||||
# http://www.w3.org/TR/json-ld-api/#idl-def-JsonLdErrorCode.{code-message}
|
||||
RECURSIVE_CONTEXT_INCLUSION = JSONLDException("recursive context inclusion")
|
||||
INVALID_REMOTE_CONTEXT = JSONLDException("invalid remote context")
|
||||
INVALID_CONTEXT_ENTRY = JSONLDException("invalid context entry")
|
||||
@@ -0,0 +1,24 @@
|
||||
# https://github.com/RDFLib/rdflib-jsonld/blob/feature/json-ld-1.1/rdflib_jsonld/keys.py
|
||||
BASE = "@base"
|
||||
CONTAINER = "@container"
|
||||
CONTEXT = "@context"
|
||||
# DIRECTION = u'@direction'
|
||||
GRAPH = "@graph"
|
||||
ID = "@id"
|
||||
IMPORT = "@import"
|
||||
INCLUDED = "@included"
|
||||
INDEX = "@index"
|
||||
JSON = "@json"
|
||||
LANG = LANGUAGE = "@language"
|
||||
LIST = "@list"
|
||||
NEST = "@nest"
|
||||
NONE = "@none"
|
||||
PREFIX = "@prefix"
|
||||
PROPAGATE = "@propagate"
|
||||
PROTECTED = "@protected"
|
||||
REV = REVERSE = "@reverse"
|
||||
SET = "@set"
|
||||
TYPE = "@type"
|
||||
VALUE = "@value"
|
||||
VERSION = "@version"
|
||||
VOCAB = "@vocab"
|
||||
@@ -0,0 +1,355 @@
|
||||
# https://github.com/RDFLib/rdflib-jsonld/blob/feature/json-ld-1.1/rdflib_jsonld/util.py
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import pathlib
|
||||
from html.parser import HTMLParser
|
||||
from io import StringIO, TextIOBase, TextIOWrapper
|
||||
from typing import IO, TYPE_CHECKING, Any, Dict, List, Optional, TextIO, Tuple, Union
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import json
|
||||
else:
|
||||
try:
|
||||
import json
|
||||
|
||||
assert json # workaround for pyflakes issue #13
|
||||
except ImportError:
|
||||
import simplejson as json
|
||||
|
||||
from posixpath import normpath, sep
|
||||
from typing import TYPE_CHECKING, cast
|
||||
from urllib.parse import urljoin, urlsplit, urlunsplit
|
||||
|
||||
try:
|
||||
import orjson
|
||||
|
||||
_HAS_ORJSON = True
|
||||
except ImportError:
|
||||
orjson = None # type: ignore[assignment, unused-ignore]
|
||||
_HAS_ORJSON = False
|
||||
|
||||
|
||||
from rdflib.parser import (
|
||||
BytesIOWrapper,
|
||||
InputSource,
|
||||
PythonInputSource,
|
||||
StringInputSource,
|
||||
URLInputSource,
|
||||
create_input_source,
|
||||
)
|
||||
|
||||
|
||||
def source_to_json(
|
||||
source: Optional[
|
||||
Union[IO[bytes], TextIO, InputSource, str, bytes, pathlib.PurePath]
|
||||
],
|
||||
fragment_id: Optional[str] = None,
|
||||
extract_all_scripts: Optional[bool] = False,
|
||||
) -> Tuple[Union[Dict, List[Dict]], Any]:
|
||||
"""Extract JSON from a source document.
|
||||
|
||||
The source document can be JSON or HTML with embedded JSON script elements (type attribute = "application/ld+json").
|
||||
To process as HTML ``source.content_type`` must be set to "text/html" or "application/xhtml+xml".
|
||||
|
||||
:param source: the input source document (JSON or HTML)
|
||||
|
||||
:param fragment_id: if source is an HTML document then extract only the script element with matching id attribute, defaults to None
|
||||
|
||||
:param extract_all_scripts: if source is an HTML document then extract all script elements (unless fragment_id is provided), defaults to False (extract only the first script element)
|
||||
|
||||
:return: Tuple with the extracted JSON document and value of the HTML base element
|
||||
"""
|
||||
|
||||
if isinstance(source, PythonInputSource):
|
||||
return source.data, None
|
||||
|
||||
if isinstance(source, StringInputSource):
|
||||
# A StringInputSource is assumed to be never a HTMLJSON doc
|
||||
html_base: Any = None
|
||||
# We can get the original string from the StringInputSource
|
||||
# It's hidden in the BytesIOWrapper 'wrapped' attribute
|
||||
b_stream = source.getByteStream()
|
||||
original_string: Optional[str] = None
|
||||
json_dict: Union[Dict, List[Dict]]
|
||||
if isinstance(b_stream, BytesIOWrapper):
|
||||
wrapped_inner = cast(Union[str, StringIO, TextIOBase], b_stream.wrapped)
|
||||
if isinstance(wrapped_inner, str):
|
||||
original_string = wrapped_inner
|
||||
elif isinstance(wrapped_inner, StringIO):
|
||||
original_string = wrapped_inner.getvalue()
|
||||
if _HAS_ORJSON:
|
||||
if original_string is not None:
|
||||
json_dict = orjson.loads(original_string)
|
||||
elif isinstance(b_stream, BytesIOWrapper):
|
||||
# use the CharacterStream instead
|
||||
c_stream = source.getCharacterStream()
|
||||
json_dict = orjson.loads(c_stream.read())
|
||||
else:
|
||||
# orjson assumes its in utf-8 encoding so
|
||||
# don't bother to check the source.getEncoding()
|
||||
json_dict = orjson.loads(b_stream.read())
|
||||
else:
|
||||
if original_string is not None:
|
||||
json_dict = json.loads(original_string)
|
||||
else:
|
||||
json_dict = json.load(source.getCharacterStream())
|
||||
return json_dict, html_base
|
||||
|
||||
# TODO: conneg for JSON (fix support in rdflib's URLInputSource!)
|
||||
source = create_input_source(source, format="json-ld")
|
||||
try:
|
||||
content_type = source.content_type
|
||||
except (AttributeError, LookupError):
|
||||
content_type = None
|
||||
|
||||
is_html = content_type is not None and content_type.lower() in (
|
||||
"text/html",
|
||||
"application/xhtml+xml",
|
||||
)
|
||||
if is_html:
|
||||
html_docparser: Optional[HTMLJSONParser] = HTMLJSONParser(
|
||||
fragment_id=fragment_id, extract_all_scripts=extract_all_scripts
|
||||
)
|
||||
else:
|
||||
html_docparser = None
|
||||
try:
|
||||
b_stream = source.getByteStream()
|
||||
except (AttributeError, LookupError):
|
||||
b_stream = None
|
||||
try:
|
||||
c_stream = source.getCharacterStream()
|
||||
except (AttributeError, LookupError):
|
||||
c_stream = None
|
||||
if b_stream is None and c_stream is None:
|
||||
raise ValueError(
|
||||
f"Source does not have a character stream or a byte stream and cannot be used {type(source)}"
|
||||
)
|
||||
try:
|
||||
b_encoding: Optional[str] = None if b_stream is None else source.getEncoding()
|
||||
except (AttributeError, LookupError):
|
||||
b_encoding = None
|
||||
underlying_string: Optional[str] = None
|
||||
if b_stream is not None and isinstance(b_stream, BytesIOWrapper):
|
||||
# Try to find an underlying wrapped Unicode string to use?
|
||||
wrapped_inner = b_stream.wrapped
|
||||
if isinstance(wrapped_inner, str):
|
||||
underlying_string = wrapped_inner
|
||||
elif isinstance(wrapped_inner, StringIO):
|
||||
underlying_string = wrapped_inner.getvalue()
|
||||
try:
|
||||
if is_html and html_docparser is not None:
|
||||
# Offload parsing to the HTMLJSONParser
|
||||
if underlying_string is not None:
|
||||
html_string: str = underlying_string
|
||||
elif c_stream is not None:
|
||||
html_string = c_stream.read()
|
||||
else:
|
||||
if TYPE_CHECKING:
|
||||
assert b_stream is not None
|
||||
if b_encoding is None:
|
||||
b_encoding = "utf-8"
|
||||
html_string = TextIOWrapper(b_stream, encoding=b_encoding).read()
|
||||
html_docparser.feed(html_string)
|
||||
json_dict, html_base = html_docparser.get_json(), html_docparser.get_base()
|
||||
elif _HAS_ORJSON:
|
||||
html_base = None
|
||||
if underlying_string is not None:
|
||||
json_dict = orjson.loads(underlying_string)
|
||||
elif (
|
||||
(b_stream is not None and isinstance(b_stream, BytesIOWrapper))
|
||||
or b_stream is None
|
||||
) and c_stream is not None:
|
||||
# use the CharacterStream instead
|
||||
json_dict = orjson.loads(c_stream.read())
|
||||
else:
|
||||
if TYPE_CHECKING:
|
||||
assert b_stream is not None
|
||||
# b_stream is not None
|
||||
json_dict = orjson.loads(b_stream.read())
|
||||
else:
|
||||
html_base = None
|
||||
if underlying_string is not None:
|
||||
return json.loads(underlying_string)
|
||||
if c_stream is not None:
|
||||
use_stream = c_stream
|
||||
else:
|
||||
if TYPE_CHECKING:
|
||||
assert b_stream is not None
|
||||
# b_stream is not None
|
||||
if b_encoding is None:
|
||||
b_encoding = "utf-8"
|
||||
use_stream = TextIOWrapper(b_stream, encoding=b_encoding)
|
||||
json_dict = json.load(use_stream)
|
||||
return json_dict, html_base
|
||||
finally:
|
||||
if b_stream is not None:
|
||||
try:
|
||||
b_stream.close()
|
||||
except AttributeError:
|
||||
pass
|
||||
if c_stream is not None:
|
||||
try:
|
||||
c_stream.close()
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
|
||||
VOCAB_DELIMS = ("#", "/", ":")
|
||||
|
||||
|
||||
def split_iri(iri: str) -> Tuple[str, Optional[str]]:
|
||||
for delim in VOCAB_DELIMS:
|
||||
at = iri.rfind(delim)
|
||||
if at > -1:
|
||||
return iri[: at + 1], iri[at + 1 :]
|
||||
return iri, None
|
||||
|
||||
|
||||
def norm_url(base: str, url: str) -> str:
|
||||
"""
|
||||
>>> norm_url('http://example.org/', '/one')
|
||||
'http://example.org/one'
|
||||
>>> norm_url('http://example.org/', '/one#')
|
||||
'http://example.org/one#'
|
||||
>>> norm_url('http://example.org/one', 'two')
|
||||
'http://example.org/two'
|
||||
>>> norm_url('http://example.org/one/', 'two')
|
||||
'http://example.org/one/two'
|
||||
>>> norm_url('http://example.org/', 'http://example.net/one')
|
||||
'http://example.net/one'
|
||||
>>> norm_url('http://example.org/', 'http://example.org//one')
|
||||
'http://example.org//one'
|
||||
"""
|
||||
if "://" in url:
|
||||
return url
|
||||
|
||||
# Fix for URNs
|
||||
parsed_base = urlsplit(base)
|
||||
parsed_url = urlsplit(url)
|
||||
if parsed_url.scheme:
|
||||
# Assume full URL
|
||||
return url
|
||||
if parsed_base.scheme in ("urn", "urn-x"):
|
||||
# No scheme -> assume relative and join paths
|
||||
base_path_parts = parsed_base.path.split("/", 1)
|
||||
base_path = "/" + (base_path_parts[1] if len(base_path_parts) > 1 else "")
|
||||
joined_path = urljoin(base_path, parsed_url.path)
|
||||
fragment = f"#{parsed_url.fragment}" if parsed_url.fragment else ""
|
||||
result = f"{parsed_base.scheme}:{base_path_parts[0]}{joined_path}{fragment}"
|
||||
else:
|
||||
parts = urlsplit(urljoin(base, url))
|
||||
path = normpath(parts[2])
|
||||
if sep != "/":
|
||||
path = "/".join(path.split(sep))
|
||||
if parts[2].endswith("/") and not path.endswith("/"):
|
||||
path += "/"
|
||||
result = urlunsplit(parts[0:2] + (path,) + parts[3:])
|
||||
if url.endswith("#") and not result.endswith("#"):
|
||||
result += "#"
|
||||
return result
|
||||
|
||||
|
||||
# type error: Missing return statement
|
||||
def context_from_urlinputsource(source: URLInputSource) -> Optional[str]: # type: ignore[return]
|
||||
"""
|
||||
Please note that JSON-LD documents served with the application/ld+json media type
|
||||
MUST have all context information, including references to external contexts,
|
||||
within the body of the document. Contexts linked via a
|
||||
http://www.w3.org/ns/json-ld#context HTTP Link Header MUST be
|
||||
ignored for such documents.
|
||||
"""
|
||||
if source.content_type != "application/ld+json":
|
||||
try:
|
||||
# source.links is the new way of getting Link headers from URLInputSource
|
||||
links = source.links
|
||||
except AttributeError:
|
||||
# type error: Return value expected
|
||||
return # type: ignore[return-value]
|
||||
for link in links:
|
||||
if ' rel="http://www.w3.org/ns/json-ld#context"' in link:
|
||||
i, j = link.index("<"), link.index(">")
|
||||
if i > -1 and j > -1:
|
||||
# type error: Value of type variable "AnyStr" of "urljoin" cannot be "Optional[str]"
|
||||
return urljoin(source.url, link[i + 1 : j]) # type: ignore[type-var]
|
||||
|
||||
|
||||
__all__ = [
|
||||
"json",
|
||||
"source_to_json",
|
||||
"split_iri",
|
||||
"norm_url",
|
||||
"context_from_urlinputsource",
|
||||
"orjson",
|
||||
"_HAS_ORJSON",
|
||||
]
|
||||
|
||||
|
||||
class HTMLJSONParser(HTMLParser):
|
||||
def __init__(
|
||||
self,
|
||||
fragment_id: Optional[str] = None,
|
||||
extract_all_scripts: Optional[bool] = False,
|
||||
):
|
||||
super().__init__()
|
||||
self.fragment_id = fragment_id
|
||||
self.json: List[Dict] = []
|
||||
self.contains_json = False
|
||||
self.fragment_id_does_not_match = False
|
||||
self.base = None
|
||||
self.extract_all_scripts = extract_all_scripts
|
||||
self.script_count = 0
|
||||
|
||||
def handle_starttag(self, tag, attrs):
|
||||
self.contains_json = False
|
||||
self.fragment_id_does_not_match = False
|
||||
|
||||
# Only set self. contains_json to True if the
|
||||
# type is 'application/ld+json'
|
||||
if tag == "script":
|
||||
for attr, value in attrs:
|
||||
if attr == "type" and value == "application/ld+json":
|
||||
self.contains_json = True
|
||||
elif attr == "id" and self.fragment_id and value != self.fragment_id:
|
||||
self.fragment_id_does_not_match = True
|
||||
|
||||
elif tag == "base":
|
||||
for attr, value in attrs:
|
||||
if attr == "href":
|
||||
self.base = value
|
||||
|
||||
def handle_data(self, data):
|
||||
# Only do something when we know the context is a
|
||||
# script element containing application/ld+json
|
||||
|
||||
if self.contains_json is True and self.fragment_id_does_not_match is False:
|
||||
|
||||
if not self.extract_all_scripts and self.script_count > 0:
|
||||
return
|
||||
|
||||
if data.strip() == "":
|
||||
# skip empty data elements
|
||||
return
|
||||
|
||||
# Try to parse the json
|
||||
if _HAS_ORJSON:
|
||||
# orjson can load a unicode string
|
||||
# if that's the only thing we have,
|
||||
# its not worth encoding it to bytes
|
||||
parsed = orjson.loads(data)
|
||||
else:
|
||||
parsed = json.loads(data)
|
||||
|
||||
# Add to the result document
|
||||
if isinstance(parsed, list):
|
||||
self.json.extend(parsed)
|
||||
else:
|
||||
self.json.append(parsed)
|
||||
|
||||
self.script_count += 1
|
||||
|
||||
def get_json(self) -> List[Dict]:
|
||||
return self.json
|
||||
|
||||
def get_base(self):
|
||||
return self.base
|
||||
@@ -0,0 +1,63 @@
|
||||
"""
|
||||
SPARQL implementation for RDFLib
|
||||
|
||||
.. versionadded:: 4.0
|
||||
"""
|
||||
|
||||
from importlib.metadata import entry_points
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
SPARQL_LOAD_GRAPHS = True
|
||||
"""
|
||||
If True, using FROM <uri> and FROM NAMED <uri>
|
||||
will load/parse more data
|
||||
"""
|
||||
|
||||
|
||||
SPARQL_DEFAULT_GRAPH_UNION = True
|
||||
"""
|
||||
If True - the default graph in the RDF Dataset is the union of all
|
||||
named graphs (like RDFLib's ConjunctiveGraph)
|
||||
"""
|
||||
|
||||
|
||||
CUSTOM_EVALS = {}
|
||||
"""
|
||||
Custom evaluation functions
|
||||
|
||||
These must be functions taking (ctx, part) and raise
|
||||
NotImplementedError if they cannot handle a certain part
|
||||
"""
|
||||
|
||||
|
||||
PLUGIN_ENTRY_POINT = "rdf.plugins.sparqleval"
|
||||
|
||||
|
||||
from . import operators, parser, parserutils
|
||||
from .processor import prepareQuery, prepareUpdate, processUpdate
|
||||
|
||||
assert parser
|
||||
assert operators
|
||||
assert parserutils
|
||||
|
||||
|
||||
all_entry_points = entry_points()
|
||||
if hasattr(all_entry_points, "select"):
|
||||
for ep in all_entry_points.select(group=PLUGIN_ENTRY_POINT):
|
||||
CUSTOM_EVALS[ep.name] = ep.load()
|
||||
else:
|
||||
# Prior to Python 3.10, this returns a dict instead of the selection interface
|
||||
if TYPE_CHECKING:
|
||||
assert isinstance(all_entry_points, dict)
|
||||
for ep in all_entry_points.get(PLUGIN_ENTRY_POINT, []):
|
||||
CUSTOM_EVALS[ep.name] = ep.load()
|
||||
|
||||
__all__ = [
|
||||
"prepareQuery",
|
||||
"prepareUpdate",
|
||||
"processUpdate",
|
||||
"operators",
|
||||
"parser",
|
||||
"parserutils",
|
||||
"CUSTOM_EVALS",
|
||||
]
|
||||
@@ -0,0 +1,316 @@
|
||||
"""
|
||||
Aggregation functions
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from decimal import Decimal
|
||||
from typing import (
|
||||
Any,
|
||||
Callable,
|
||||
Dict,
|
||||
Iterable,
|
||||
List,
|
||||
Mapping,
|
||||
MutableMapping,
|
||||
Optional,
|
||||
Set,
|
||||
Tuple,
|
||||
TypeVar,
|
||||
Union,
|
||||
overload,
|
||||
)
|
||||
|
||||
from rdflib.namespace import XSD
|
||||
from rdflib.plugins.sparql.datatypes import type_promotion
|
||||
from rdflib.plugins.sparql.evalutils import _eval, _val
|
||||
from rdflib.plugins.sparql.operators import numeric
|
||||
from rdflib.plugins.sparql.parserutils import CompValue
|
||||
from rdflib.plugins.sparql.sparql import FrozenBindings, NotBoundError, SPARQLTypeError
|
||||
from rdflib.term import BNode, Identifier, Literal, URIRef, Variable
|
||||
|
||||
|
||||
class Accumulator:
|
||||
"""abstract base class for different aggregation functions"""
|
||||
|
||||
def __init__(self, aggregation: CompValue):
|
||||
self.get_value: Callable[[], Optional[Literal]]
|
||||
self.update: Callable[[FrozenBindings, Aggregator], None]
|
||||
self.var = aggregation.res
|
||||
self.expr = aggregation.vars
|
||||
if not aggregation.distinct:
|
||||
# type error: Cannot assign to a method
|
||||
self.use_row = self.dont_care # type: ignore[method-assign]
|
||||
self.distinct = False
|
||||
else:
|
||||
self.distinct = aggregation.distinct
|
||||
self.seen: Set[Any] = set()
|
||||
|
||||
def dont_care(self, row: FrozenBindings) -> bool:
|
||||
"""skips distinct test"""
|
||||
return True
|
||||
|
||||
def use_row(self, row: FrozenBindings) -> bool:
|
||||
"""tests distinct with set"""
|
||||
return _eval(self.expr, row) not in self.seen
|
||||
|
||||
def set_value(self, bindings: MutableMapping[Variable, Identifier]) -> None:
|
||||
"""sets final value in bindings"""
|
||||
# type error: Incompatible types in assignment (expression has type "Optional[Literal]", target has type "Identifier")
|
||||
bindings[self.var] = self.get_value() # type: ignore[assignment]
|
||||
|
||||
|
||||
class Counter(Accumulator):
|
||||
def __init__(self, aggregation: CompValue):
|
||||
super(Counter, self).__init__(aggregation)
|
||||
self.value = 0
|
||||
if self.expr == "*":
|
||||
# cannot eval "*" => always use the full row
|
||||
# type error: Cannot assign to a method
|
||||
self.eval_row = self.eval_full_row # type: ignore[assignment]
|
||||
|
||||
def update(self, row: FrozenBindings, aggregator: Aggregator) -> None:
|
||||
try:
|
||||
val = self.eval_row(row)
|
||||
except NotBoundError:
|
||||
# skip UNDEF
|
||||
return
|
||||
self.value += 1
|
||||
if self.distinct:
|
||||
self.seen.add(val)
|
||||
|
||||
def get_value(self) -> Literal:
|
||||
return Literal(self.value)
|
||||
|
||||
def eval_row(self, row: FrozenBindings) -> Identifier:
|
||||
return _eval(self.expr, row)
|
||||
|
||||
def eval_full_row(self, row: FrozenBindings) -> FrozenBindings:
|
||||
return row
|
||||
|
||||
def use_row(self, row: FrozenBindings) -> bool:
|
||||
try:
|
||||
return self.eval_row(row) not in self.seen
|
||||
except NotBoundError:
|
||||
# happens when counting zero optional nodes. See issue #2229
|
||||
return False
|
||||
|
||||
|
||||
@overload
|
||||
def type_safe_numbers(*args: int) -> Tuple[int]: ...
|
||||
|
||||
|
||||
@overload
|
||||
def type_safe_numbers(
|
||||
*args: Union[Decimal, float, int]
|
||||
) -> Tuple[Union[float, int]]: ...
|
||||
|
||||
|
||||
def type_safe_numbers(*args: Union[Decimal, float, int]) -> Iterable[Union[float, int]]:
|
||||
if any(isinstance(arg, float) for arg in args) and any(
|
||||
isinstance(arg, Decimal) for arg in args
|
||||
):
|
||||
return map(float, args)
|
||||
# type error: Incompatible return value type (got "Tuple[Union[Decimal, float, int], ...]", expected "Iterable[Union[float, int]]")
|
||||
# NOTE on type error: if args contains a Decimal it will nopt get here.
|
||||
return args # type: ignore[return-value]
|
||||
|
||||
|
||||
class Sum(Accumulator):
|
||||
def __init__(self, aggregation: CompValue):
|
||||
super(Sum, self).__init__(aggregation)
|
||||
self.value = 0
|
||||
self.datatype: Optional[str] = None
|
||||
|
||||
def update(self, row: FrozenBindings, aggregator: Aggregator) -> None:
|
||||
try:
|
||||
value = _eval(self.expr, row)
|
||||
dt = self.datatype
|
||||
if dt is None:
|
||||
dt = value.datatype
|
||||
else:
|
||||
# type error: Argument 1 to "type_promotion" has incompatible type "str"; expected "URIRef"
|
||||
dt = type_promotion(dt, value.datatype) # type: ignore[arg-type]
|
||||
self.datatype = dt
|
||||
self.value = sum(type_safe_numbers(self.value, numeric(value)))
|
||||
if self.distinct:
|
||||
self.seen.add(value)
|
||||
except NotBoundError:
|
||||
# skip UNDEF
|
||||
pass
|
||||
|
||||
def get_value(self) -> Literal:
|
||||
return Literal(self.value, datatype=self.datatype)
|
||||
|
||||
|
||||
class Average(Accumulator):
|
||||
def __init__(self, aggregation: CompValue):
|
||||
super(Average, self).__init__(aggregation)
|
||||
self.counter = 0
|
||||
self.sum = 0
|
||||
self.datatype: Optional[str] = None
|
||||
|
||||
def update(self, row: FrozenBindings, aggregator: Aggregator) -> None:
|
||||
try:
|
||||
value = _eval(self.expr, row)
|
||||
dt = self.datatype
|
||||
self.sum = sum(type_safe_numbers(self.sum, numeric(value)))
|
||||
if dt is None:
|
||||
dt = value.datatype
|
||||
else:
|
||||
# type error: Argument 1 to "type_promotion" has incompatible type "str"; expected "URIRef"
|
||||
dt = type_promotion(dt, value.datatype) # type: ignore[arg-type]
|
||||
self.datatype = dt
|
||||
if self.distinct:
|
||||
self.seen.add(value)
|
||||
self.counter += 1
|
||||
# skip UNDEF or BNode => SPARQLTypeError
|
||||
except NotBoundError:
|
||||
pass
|
||||
except SPARQLTypeError:
|
||||
pass
|
||||
|
||||
def get_value(self) -> Literal:
|
||||
if self.counter == 0:
|
||||
return Literal(0)
|
||||
if self.datatype in (XSD.float, XSD.double):
|
||||
return Literal(self.sum / self.counter)
|
||||
else:
|
||||
return Literal(Decimal(self.sum) / Decimal(self.counter))
|
||||
|
||||
|
||||
class Extremum(Accumulator):
|
||||
"""abstract base class for Minimum and Maximum"""
|
||||
|
||||
def __init__(self, aggregation: CompValue):
|
||||
self.compare: Callable[[Any, Any], Any]
|
||||
super(Extremum, self).__init__(aggregation)
|
||||
self.value: Any = None
|
||||
# DISTINCT would not change the value for MIN or MAX
|
||||
# type error: Cannot assign to a method
|
||||
self.use_row = self.dont_care # type: ignore[method-assign]
|
||||
|
||||
def set_value(self, bindings: MutableMapping[Variable, Identifier]) -> None:
|
||||
if self.value is not None:
|
||||
# simply do not set if self.value is still None
|
||||
bindings[self.var] = Literal(self.value)
|
||||
|
||||
def update(self, row: FrozenBindings, aggregator: Aggregator) -> None:
|
||||
try:
|
||||
if self.value is None:
|
||||
self.value = _eval(self.expr, row)
|
||||
else:
|
||||
# self.compare is implemented by Minimum/Maximum
|
||||
self.value = self.compare(self.value, _eval(self.expr, row))
|
||||
# skip UNDEF or BNode => SPARQLTypeError
|
||||
except NotBoundError:
|
||||
pass
|
||||
except SPARQLTypeError:
|
||||
pass
|
||||
|
||||
|
||||
_ValueT = TypeVar("_ValueT", Variable, BNode, URIRef, Literal)
|
||||
|
||||
|
||||
class Minimum(Extremum):
|
||||
def compare(self, val1: _ValueT, val2: _ValueT) -> _ValueT:
|
||||
return min(val1, val2, key=_val)
|
||||
|
||||
|
||||
class Maximum(Extremum):
|
||||
def compare(self, val1: _ValueT, val2: _ValueT) -> _ValueT:
|
||||
return max(val1, val2, key=_val)
|
||||
|
||||
|
||||
class Sample(Accumulator):
|
||||
"""takes the first eligible value"""
|
||||
|
||||
def __init__(self, aggregation):
|
||||
super(Sample, self).__init__(aggregation)
|
||||
# DISTINCT would not change the value
|
||||
# type error: Cannot assign to a method
|
||||
self.use_row = self.dont_care # type: ignore[method-assign]
|
||||
|
||||
def update(self, row: FrozenBindings, aggregator: Aggregator) -> None:
|
||||
try:
|
||||
# set the value now
|
||||
aggregator.bindings[self.var] = _eval(self.expr, row)
|
||||
# and skip this accumulator for future rows
|
||||
del aggregator.accumulators[self.var]
|
||||
except NotBoundError:
|
||||
pass
|
||||
|
||||
def get_value(self) -> None:
|
||||
# set None if no value was set
|
||||
return None
|
||||
|
||||
|
||||
class GroupConcat(Accumulator):
|
||||
value: List[Literal]
|
||||
|
||||
def __init__(self, aggregation: CompValue):
|
||||
super(GroupConcat, self).__init__(aggregation)
|
||||
# only GROUPCONCAT needs to have a list as accumulator
|
||||
self.value = []
|
||||
if aggregation.separator is None:
|
||||
self.separator = " "
|
||||
else:
|
||||
self.separator = aggregation.separator
|
||||
|
||||
def update(self, row: FrozenBindings, aggregator: Aggregator) -> None:
|
||||
try:
|
||||
value = _eval(self.expr, row)
|
||||
# skip UNDEF
|
||||
if isinstance(value, NotBoundError):
|
||||
return
|
||||
self.value.append(value)
|
||||
if self.distinct:
|
||||
self.seen.add(value)
|
||||
# skip UNDEF
|
||||
# NOTE: It seems like this is not the way undefined values occur, they
|
||||
# come through not as exceptions but as values. This is left here
|
||||
# however as it may occur in some cases.
|
||||
# TODO: Consider removing this.
|
||||
except NotBoundError:
|
||||
pass
|
||||
|
||||
def get_value(self) -> Literal:
|
||||
return Literal(self.separator.join(str(v) for v in self.value))
|
||||
|
||||
|
||||
class Aggregator:
|
||||
"""combines different Accumulator objects"""
|
||||
|
||||
accumulator_classes = {
|
||||
"Aggregate_Count": Counter,
|
||||
"Aggregate_Sample": Sample,
|
||||
"Aggregate_Sum": Sum,
|
||||
"Aggregate_Avg": Average,
|
||||
"Aggregate_Min": Minimum,
|
||||
"Aggregate_Max": Maximum,
|
||||
"Aggregate_GroupConcat": GroupConcat,
|
||||
}
|
||||
|
||||
def __init__(self, aggregations: List[CompValue]):
|
||||
self.bindings: Dict[Variable, Identifier] = {}
|
||||
self.accumulators: Dict[str, Accumulator] = {}
|
||||
for a in aggregations:
|
||||
accumulator_class = self.accumulator_classes.get(a.name)
|
||||
if accumulator_class is None:
|
||||
raise Exception("Unknown aggregate function " + a.name)
|
||||
self.accumulators[a.res] = accumulator_class(a)
|
||||
|
||||
def update(self, row: FrozenBindings) -> None:
|
||||
"""update all own accumulators"""
|
||||
# SAMPLE accumulators may delete themselves
|
||||
# => iterate over list not generator
|
||||
|
||||
for acc in list(self.accumulators.values()):
|
||||
if acc.use_row(row):
|
||||
acc.update(row, self)
|
||||
|
||||
def get_bindings(self) -> Mapping[Variable, Identifier]:
|
||||
"""calculate and set last values"""
|
||||
for acc in self.accumulators.values():
|
||||
acc.set_value(self.bindings)
|
||||
return self.bindings
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,102 @@
|
||||
"""
|
||||
Utility functions for supporting the XML Schema Datatypes hierarchy
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Dict, List, Optional, Set
|
||||
|
||||
from rdflib.namespace import XSD
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from rdflib.term import URIRef
|
||||
|
||||
|
||||
XSD_DTs: Set[URIRef] = set(
|
||||
(
|
||||
XSD.integer,
|
||||
XSD.decimal,
|
||||
XSD.float,
|
||||
XSD.double,
|
||||
XSD.string,
|
||||
XSD.boolean,
|
||||
XSD.dateTime,
|
||||
XSD.nonPositiveInteger,
|
||||
XSD.negativeInteger,
|
||||
XSD.long,
|
||||
XSD.int,
|
||||
XSD.short,
|
||||
XSD.byte,
|
||||
XSD.nonNegativeInteger,
|
||||
XSD.unsignedLong,
|
||||
XSD.unsignedInt,
|
||||
XSD.unsignedShort,
|
||||
XSD.unsignedByte,
|
||||
XSD.positiveInteger,
|
||||
XSD.date,
|
||||
)
|
||||
)
|
||||
|
||||
# adding dateTime datatypes
|
||||
|
||||
XSD_DateTime_DTs = set((XSD.dateTime, XSD.date, XSD.time))
|
||||
|
||||
XSD_Duration_DTs = set((XSD.duration, XSD.dayTimeDuration, XSD.yearMonthDuration))
|
||||
|
||||
_sub_types: Dict[URIRef, List[URIRef]] = {
|
||||
XSD.integer: [
|
||||
XSD.nonPositiveInteger,
|
||||
XSD.negativeInteger,
|
||||
XSD.long,
|
||||
XSD.int,
|
||||
XSD.short,
|
||||
XSD.byte,
|
||||
XSD.nonNegativeInteger,
|
||||
XSD.positiveInteger,
|
||||
XSD.unsignedLong,
|
||||
XSD.unsignedInt,
|
||||
XSD.unsignedShort,
|
||||
XSD.unsignedByte,
|
||||
],
|
||||
}
|
||||
|
||||
_super_types: Dict[URIRef, URIRef] = {}
|
||||
for superdt in XSD_DTs:
|
||||
for subdt in _sub_types.get(superdt, []):
|
||||
_super_types[subdt] = superdt
|
||||
|
||||
# we only care about float, double, integer, decimal
|
||||
_typePromotionMap: Dict[URIRef, Dict[URIRef, URIRef]] = {
|
||||
XSD.float: {XSD.integer: XSD.float, XSD.decimal: XSD.float, XSD.double: XSD.double},
|
||||
XSD.double: {
|
||||
XSD.integer: XSD.double,
|
||||
XSD.float: XSD.double,
|
||||
XSD.decimal: XSD.double,
|
||||
},
|
||||
XSD.decimal: {
|
||||
XSD.integer: XSD.decimal,
|
||||
XSD.float: XSD.float,
|
||||
XSD.double: XSD.double,
|
||||
},
|
||||
XSD.integer: {
|
||||
XSD.decimal: XSD.decimal,
|
||||
XSD.float: XSD.float,
|
||||
XSD.double: XSD.double,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def type_promotion(t1: URIRef, t2: Optional[URIRef]) -> URIRef:
|
||||
if t2 is None:
|
||||
return t1
|
||||
t1 = _super_types.get(t1, t1)
|
||||
t2 = _super_types.get(t2, t2)
|
||||
if t1 == t2:
|
||||
return t1 # matching super-types
|
||||
try:
|
||||
if TYPE_CHECKING:
|
||||
# type assert because mypy is confused and thinks t2 can be None
|
||||
assert t2 is not None
|
||||
return _typePromotionMap[t1][t2]
|
||||
except KeyError:
|
||||
raise TypeError("Operators cannot combine datatypes %s and %s" % (t1, t2))
|
||||
@@ -0,0 +1,685 @@
|
||||
"""
|
||||
These method recursively evaluate the SPARQL Algebra
|
||||
|
||||
evalQuery is the entry-point, it will setup context and
|
||||
return the SPARQLResult object
|
||||
|
||||
evalPart is called on each level and will delegate to the right method
|
||||
|
||||
A rdflib.plugins.sparql.sparql.QueryContext is passed along, keeping
|
||||
information needed for evaluation
|
||||
|
||||
A list of dicts (solution mappings) is returned, apart from GroupBy which may
|
||||
also return a dict of list of dicts
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import collections
|
||||
import itertools
|
||||
import re
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Deque,
|
||||
Dict,
|
||||
Generator,
|
||||
Iterable,
|
||||
List,
|
||||
Mapping,
|
||||
Optional,
|
||||
Tuple,
|
||||
Union,
|
||||
)
|
||||
from urllib.parse import urlencode
|
||||
from urllib.request import Request, urlopen
|
||||
|
||||
from pyparsing import ParseException
|
||||
|
||||
from rdflib.graph import Graph
|
||||
from rdflib.plugins.sparql import CUSTOM_EVALS, parser
|
||||
from rdflib.plugins.sparql.aggregates import Aggregator
|
||||
from rdflib.plugins.sparql.evalutils import (
|
||||
_ebv,
|
||||
_eval,
|
||||
_fillTemplate,
|
||||
_join,
|
||||
_minus,
|
||||
_val,
|
||||
)
|
||||
from rdflib.plugins.sparql.parserutils import CompValue, value
|
||||
from rdflib.plugins.sparql.sparql import (
|
||||
AlreadyBound,
|
||||
FrozenBindings,
|
||||
FrozenDict,
|
||||
Query,
|
||||
QueryContext,
|
||||
SPARQLError,
|
||||
)
|
||||
from rdflib.term import BNode, Identifier, Literal, URIRef, Variable
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from rdflib.paths import Path
|
||||
|
||||
import json
|
||||
|
||||
try:
|
||||
import orjson
|
||||
|
||||
_HAS_ORJSON = True
|
||||
except ImportError:
|
||||
orjson = None # type: ignore[assignment, unused-ignore]
|
||||
_HAS_ORJSON = False
|
||||
|
||||
_Triple = Tuple[Identifier, Identifier, Identifier]
|
||||
|
||||
|
||||
def evalBGP(
|
||||
ctx: QueryContext, bgp: List[_Triple]
|
||||
) -> Generator[FrozenBindings, None, None]:
|
||||
"""
|
||||
A basic graph pattern
|
||||
"""
|
||||
|
||||
if not bgp:
|
||||
yield ctx.solution()
|
||||
return
|
||||
|
||||
s, p, o = bgp[0]
|
||||
|
||||
_s = ctx[s]
|
||||
_p = ctx[p]
|
||||
_o = ctx[o]
|
||||
|
||||
# type error: Item "None" of "Optional[Graph]" has no attribute "triples"
|
||||
# type Argument 1 to "triples" of "Graph" has incompatible type "Tuple[Union[str, Path, None], Union[str, Path, None], Union[str, Path, None]]"; expected "Tuple[Optional[Node], Optional[Node], Optional[Node]]"
|
||||
for ss, sp, so in ctx.graph.triples((_s, _p, _o)): # type: ignore[union-attr, arg-type]
|
||||
if None in (_s, _p, _o):
|
||||
c = ctx.push()
|
||||
else:
|
||||
c = ctx
|
||||
|
||||
if _s is None:
|
||||
# type error: Incompatible types in assignment (expression has type "Union[Node, Any]", target has type "Identifier")
|
||||
c[s] = ss # type: ignore[assignment]
|
||||
|
||||
try:
|
||||
if _p is None:
|
||||
# type error: Incompatible types in assignment (expression has type "Union[Node, Any]", target has type "Identifier")
|
||||
c[p] = sp # type: ignore[assignment]
|
||||
except AlreadyBound:
|
||||
continue
|
||||
|
||||
try:
|
||||
if _o is None:
|
||||
# type error: Incompatible types in assignment (expression has type "Union[Node, Any]", target has type "Identifier")
|
||||
c[o] = so # type: ignore[assignment]
|
||||
except AlreadyBound:
|
||||
continue
|
||||
|
||||
for x in evalBGP(c, bgp[1:]):
|
||||
yield x
|
||||
|
||||
|
||||
def evalExtend(
|
||||
ctx: QueryContext, extend: CompValue
|
||||
) -> Generator[FrozenBindings, None, None]:
|
||||
# TODO: Deal with dict returned from evalPart from GROUP BY
|
||||
|
||||
for c in evalPart(ctx, extend.p):
|
||||
try:
|
||||
e = _eval(extend.expr, c.forget(ctx, _except=extend._vars))
|
||||
if isinstance(e, SPARQLError):
|
||||
raise e
|
||||
|
||||
yield c.merge({extend.var: e})
|
||||
|
||||
except SPARQLError:
|
||||
yield c
|
||||
|
||||
|
||||
def evalLazyJoin(
|
||||
ctx: QueryContext, join: CompValue
|
||||
) -> Generator[FrozenBindings, None, None]:
|
||||
"""
|
||||
A lazy join will push the variables bound
|
||||
in the first part to the second part,
|
||||
essentially doing the join implicitly
|
||||
hopefully evaluating much fewer triples
|
||||
"""
|
||||
for a in evalPart(ctx, join.p1):
|
||||
c = ctx.thaw(a)
|
||||
for b in evalPart(c, join.p2):
|
||||
yield b.merge(a) # merge, as some bindings may have been forgotten
|
||||
|
||||
|
||||
def evalJoin(ctx: QueryContext, join: CompValue) -> Generator[FrozenDict, None, None]:
|
||||
# TODO: Deal with dict returned from evalPart from GROUP BY
|
||||
# only ever for join.p1
|
||||
|
||||
if join.lazy:
|
||||
return evalLazyJoin(ctx, join)
|
||||
else:
|
||||
a = evalPart(ctx, join.p1)
|
||||
b = set(evalPart(ctx, join.p2))
|
||||
return _join(a, b)
|
||||
|
||||
|
||||
def evalUnion(ctx: QueryContext, union: CompValue) -> List[Any]:
|
||||
branch1_branch2 = []
|
||||
for x in evalPart(ctx, union.p1):
|
||||
branch1_branch2.append(x)
|
||||
for x in evalPart(ctx, union.p2):
|
||||
branch1_branch2.append(x)
|
||||
return branch1_branch2
|
||||
|
||||
|
||||
def evalMinus(ctx: QueryContext, minus: CompValue) -> Generator[FrozenDict, None, None]:
|
||||
a = evalPart(ctx, minus.p1)
|
||||
b = set(evalPart(ctx, minus.p2))
|
||||
return _minus(a, b)
|
||||
|
||||
|
||||
def evalLeftJoin(
|
||||
ctx: QueryContext, join: CompValue
|
||||
) -> Generator[FrozenBindings, None, None]:
|
||||
# import pdb; pdb.set_trace()
|
||||
for a in evalPart(ctx, join.p1):
|
||||
ok = False
|
||||
c = ctx.thaw(a)
|
||||
for b in evalPart(c, join.p2):
|
||||
if _ebv(join.expr, b.forget(ctx)):
|
||||
ok = True
|
||||
yield b
|
||||
if not ok:
|
||||
# we've cheated, the ctx above may contain
|
||||
# vars bound outside our scope
|
||||
# before we yield a solution without the OPTIONAL part
|
||||
# check that we would have had no OPTIONAL matches
|
||||
# even without prior bindings...
|
||||
p1_vars = join.p1._vars
|
||||
if p1_vars is None or not any(
|
||||
_ebv(join.expr, b)
|
||||
for b in evalPart(ctx.thaw(a.remember(p1_vars)), join.p2)
|
||||
):
|
||||
yield a
|
||||
|
||||
|
||||
def evalFilter(
|
||||
ctx: QueryContext, part: CompValue
|
||||
) -> Generator[FrozenBindings, None, None]:
|
||||
# TODO: Deal with dict returned from evalPart!
|
||||
for c in evalPart(ctx, part.p):
|
||||
if _ebv(
|
||||
part.expr,
|
||||
c.forget(ctx, _except=part._vars) if not part.no_isolated_scope else c,
|
||||
):
|
||||
yield c
|
||||
|
||||
|
||||
def evalGraph(
|
||||
ctx: QueryContext, part: CompValue
|
||||
) -> Generator[FrozenBindings, None, None]:
|
||||
if ctx.dataset is None:
|
||||
raise Exception(
|
||||
"Non-conjunctive-graph doesn't know about "
|
||||
+ "graphs. Try a query without GRAPH."
|
||||
)
|
||||
|
||||
ctx = ctx.clone()
|
||||
graph: Union[str, Path, None, Graph] = ctx[part.term]
|
||||
prev_graph = ctx.graph
|
||||
if graph is None:
|
||||
for graph in ctx.dataset.contexts():
|
||||
# in SPARQL the default graph is NOT a named graph
|
||||
if graph == ctx.dataset.default_context:
|
||||
continue
|
||||
|
||||
c = ctx.pushGraph(graph)
|
||||
c = c.push()
|
||||
graphSolution = [{part.term: graph.identifier}]
|
||||
for x in _join(evalPart(c, part.p), graphSolution):
|
||||
x.ctx.graph = prev_graph
|
||||
yield x
|
||||
|
||||
else:
|
||||
if TYPE_CHECKING:
|
||||
assert not isinstance(graph, Graph)
|
||||
# type error: Argument 1 to "get_context" of "ConjunctiveGraph" has incompatible type "Union[str, Path]"; expected "Union[Node, str, None]"
|
||||
c = ctx.pushGraph(ctx.dataset.get_context(graph)) # type: ignore[arg-type]
|
||||
for x in evalPart(c, part.p):
|
||||
x.ctx.graph = prev_graph
|
||||
yield x
|
||||
|
||||
|
||||
def evalValues(
|
||||
ctx: QueryContext, part: CompValue
|
||||
) -> Generator[FrozenBindings, None, None]:
|
||||
for r in part.p.res:
|
||||
c = ctx.push()
|
||||
try:
|
||||
for k, v in r.items():
|
||||
if v != "UNDEF":
|
||||
c[k] = v
|
||||
except AlreadyBound:
|
||||
continue
|
||||
|
||||
yield c.solution()
|
||||
|
||||
|
||||
def evalMultiset(ctx: QueryContext, part: CompValue):
|
||||
if part.p.name == "values":
|
||||
return evalValues(ctx, part)
|
||||
|
||||
return evalPart(ctx, part.p)
|
||||
|
||||
|
||||
def evalPart(ctx: QueryContext, part: CompValue) -> Any:
|
||||
# try custom evaluation functions
|
||||
for name, c in CUSTOM_EVALS.items():
|
||||
try:
|
||||
return c(ctx, part)
|
||||
except NotImplementedError:
|
||||
pass # the given custome-function did not handle this part
|
||||
|
||||
if part.name == "BGP":
|
||||
# Reorder triples patterns by number of bound nodes in the current ctx
|
||||
# Do patterns with more bound nodes first
|
||||
triples = sorted(
|
||||
part.triples, key=lambda t: len([n for n in t if ctx[n] is None])
|
||||
)
|
||||
|
||||
return evalBGP(ctx, triples)
|
||||
elif part.name == "Filter":
|
||||
return evalFilter(ctx, part)
|
||||
elif part.name == "Join":
|
||||
return evalJoin(ctx, part)
|
||||
elif part.name == "LeftJoin":
|
||||
return evalLeftJoin(ctx, part)
|
||||
elif part.name == "Graph":
|
||||
return evalGraph(ctx, part)
|
||||
elif part.name == "Union":
|
||||
return evalUnion(ctx, part)
|
||||
elif part.name == "ToMultiSet":
|
||||
return evalMultiset(ctx, part)
|
||||
elif part.name == "Extend":
|
||||
return evalExtend(ctx, part)
|
||||
elif part.name == "Minus":
|
||||
return evalMinus(ctx, part)
|
||||
|
||||
elif part.name == "Project":
|
||||
return evalProject(ctx, part)
|
||||
elif part.name == "Slice":
|
||||
return evalSlice(ctx, part)
|
||||
elif part.name == "Distinct":
|
||||
return evalDistinct(ctx, part)
|
||||
elif part.name == "Reduced":
|
||||
return evalReduced(ctx, part)
|
||||
|
||||
elif part.name == "OrderBy":
|
||||
return evalOrderBy(ctx, part)
|
||||
elif part.name == "Group":
|
||||
return evalGroup(ctx, part)
|
||||
elif part.name == "AggregateJoin":
|
||||
return evalAggregateJoin(ctx, part)
|
||||
|
||||
elif part.name == "SelectQuery":
|
||||
return evalSelectQuery(ctx, part)
|
||||
elif part.name == "AskQuery":
|
||||
return evalAskQuery(ctx, part)
|
||||
elif part.name == "ConstructQuery":
|
||||
return evalConstructQuery(ctx, part)
|
||||
|
||||
elif part.name == "ServiceGraphPattern":
|
||||
return evalServiceQuery(ctx, part)
|
||||
|
||||
elif part.name == "DescribeQuery":
|
||||
return evalDescribeQuery(ctx, part)
|
||||
|
||||
else:
|
||||
raise Exception("I dont know: %s" % part.name)
|
||||
|
||||
|
||||
def evalServiceQuery(ctx: QueryContext, part: CompValue):
|
||||
res = {}
|
||||
match = re.match(
|
||||
"^service <(.*)>[ \n]*{(.*)}[ \n]*$",
|
||||
# type error: Argument 2 to "get" of "CompValue" has incompatible type "str"; expected "bool" [arg-type]
|
||||
part.get("service_string", ""), # type: ignore[arg-type]
|
||||
re.DOTALL | re.I,
|
||||
)
|
||||
|
||||
if match:
|
||||
service_url = match.group(1)
|
||||
service_query = _buildQueryStringForServiceCall(ctx, match.group(2))
|
||||
|
||||
query_settings = {"query": service_query, "output": "json"}
|
||||
headers = {
|
||||
"accept": "application/sparql-results+json",
|
||||
"user-agent": "rdflibForAnUser",
|
||||
}
|
||||
# GET is easier to cache so prefer that if the query is not to long
|
||||
if len(service_query) < 600:
|
||||
response = urlopen(
|
||||
Request(service_url + "?" + urlencode(query_settings), headers=headers)
|
||||
)
|
||||
else:
|
||||
response = urlopen(
|
||||
Request(
|
||||
service_url,
|
||||
data=urlencode(query_settings).encode(),
|
||||
headers=headers,
|
||||
)
|
||||
)
|
||||
if response.status == 200:
|
||||
if _HAS_ORJSON:
|
||||
json_dict = orjson.loads(response.read())
|
||||
else:
|
||||
json_dict = json.loads(response.read())
|
||||
variables = res["vars_"] = json_dict["head"]["vars"]
|
||||
# or just return the bindings?
|
||||
res = json_dict["results"]["bindings"]
|
||||
if len(res) > 0:
|
||||
for r in res:
|
||||
# type error: Argument 2 to "_yieldBindingsFromServiceCallResult" has incompatible type "str"; expected "Dict[str, Dict[str, str]]"
|
||||
for bound in _yieldBindingsFromServiceCallResult(ctx, r, variables): # type: ignore[arg-type]
|
||||
yield bound
|
||||
else:
|
||||
raise Exception(
|
||||
"Service: %s responded with code: %s", service_url, response.status
|
||||
)
|
||||
|
||||
|
||||
"""
|
||||
Build a query string to be used by the service call.
|
||||
It is supposed to pass in the existing bound solutions.
|
||||
Re-adds prefixes if added and sets the base.
|
||||
Wraps it in select if needed.
|
||||
"""
|
||||
|
||||
|
||||
def _buildQueryStringForServiceCall(ctx: QueryContext, service_query: str) -> str:
|
||||
try:
|
||||
parser.parseQuery(service_query)
|
||||
except ParseException:
|
||||
# This could be because we don't have a select around the service call.
|
||||
service_query = "SELECT REDUCED * WHERE {" + service_query + "}"
|
||||
# type error: Item "None" of "Optional[Prologue]" has no attribute "namespace_manager"
|
||||
for p in ctx.prologue.namespace_manager.store.namespaces(): # type: ignore[union-attr]
|
||||
service_query = "PREFIX " + p[0] + ":" + p[1].n3() + " " + service_query
|
||||
# re add the base if one was defined
|
||||
# type error: Item "None" of "Optional[Prologue]" has no attribute "base"
|
||||
base = ctx.prologue.base # type: ignore[union-attr]
|
||||
if base is not None and len(base) > 0:
|
||||
service_query = "BASE <" + base + "> " + service_query
|
||||
sol = [v for v in ctx.solution() if isinstance(v, Variable)]
|
||||
if len(sol) > 0:
|
||||
variables = " ".join([v.n3() for v in sol])
|
||||
variables_bound = " ".join([ctx.get(v).n3() for v in sol])
|
||||
service_query = (
|
||||
service_query + "VALUES (" + variables + ") {(" + variables_bound + ")}"
|
||||
)
|
||||
return service_query
|
||||
|
||||
|
||||
def _yieldBindingsFromServiceCallResult(
|
||||
ctx: QueryContext, r: Dict[str, Dict[str, str]], variables: List[str]
|
||||
) -> Generator[FrozenBindings, None, None]:
|
||||
res_dict: Dict[Variable, Identifier] = {}
|
||||
for var in variables:
|
||||
if var in r and r[var]:
|
||||
var_binding = r[var]
|
||||
var_type = var_binding["type"]
|
||||
if var_type == "uri":
|
||||
res_dict[Variable(var)] = URIRef(var_binding["value"])
|
||||
elif var_type == "literal":
|
||||
res_dict[Variable(var)] = Literal(
|
||||
var_binding["value"],
|
||||
datatype=var_binding.get("datatype"),
|
||||
lang=var_binding.get("xml:lang"),
|
||||
)
|
||||
# This is here because of
|
||||
# https://www.w3.org/TR/2006/NOTE-rdf-sparql-json-res-20061004/#variable-binding-results
|
||||
elif var_type == "typed-literal":
|
||||
res_dict[Variable(var)] = Literal(
|
||||
var_binding["value"], datatype=URIRef(var_binding["datatype"])
|
||||
)
|
||||
elif var_type == "bnode":
|
||||
res_dict[Variable(var)] = BNode(var_binding["value"])
|
||||
else:
|
||||
raise ValueError(f"invalid type {var_type!r} for variable {var!r}")
|
||||
yield FrozenBindings(ctx, res_dict)
|
||||
|
||||
|
||||
def evalGroup(ctx: QueryContext, group: CompValue):
|
||||
"""
|
||||
http://www.w3.org/TR/sparql11-query/#defn_algGroup
|
||||
"""
|
||||
# grouping should be implemented by evalAggregateJoin
|
||||
return evalPart(ctx, group.p)
|
||||
|
||||
|
||||
def evalAggregateJoin(
|
||||
ctx: QueryContext, agg: CompValue
|
||||
) -> Generator[FrozenBindings, None, None]:
|
||||
# import pdb ; pdb.set_trace()
|
||||
p = evalPart(ctx, agg.p)
|
||||
# p is always a Group, we always get a dict back
|
||||
|
||||
group_expr = agg.p.expr
|
||||
res: Dict[Any, Any] = collections.defaultdict(
|
||||
lambda: Aggregator(aggregations=agg.A)
|
||||
)
|
||||
|
||||
if group_expr is None:
|
||||
# no grouping, just COUNT in SELECT clause
|
||||
# get 1 aggregator for counting
|
||||
aggregator = res[True]
|
||||
for row in p:
|
||||
aggregator.update(row)
|
||||
else:
|
||||
for row in p:
|
||||
# determine right group aggregator for row
|
||||
k = tuple(_eval(e, row, False) for e in group_expr)
|
||||
res[k].update(row)
|
||||
|
||||
# all rows are done; yield aggregated values
|
||||
for aggregator in res.values():
|
||||
yield FrozenBindings(ctx, aggregator.get_bindings())
|
||||
|
||||
# there were no matches
|
||||
if len(res) == 0:
|
||||
yield FrozenBindings(ctx)
|
||||
|
||||
|
||||
def evalOrderBy(
|
||||
ctx: QueryContext, part: CompValue
|
||||
) -> Generator[FrozenBindings, None, None]:
|
||||
res = evalPart(ctx, part.p)
|
||||
|
||||
for e in reversed(part.expr):
|
||||
reverse = bool(e.order and e.order == "DESC")
|
||||
res = sorted(
|
||||
res, key=lambda x: _val(value(x, e.expr, variables=True)), reverse=reverse
|
||||
)
|
||||
|
||||
return res
|
||||
|
||||
|
||||
def evalSlice(ctx: QueryContext, slice: CompValue):
|
||||
res = evalPart(ctx, slice.p)
|
||||
|
||||
return itertools.islice(
|
||||
res,
|
||||
slice.start,
|
||||
slice.start + slice.length if slice.length is not None else None,
|
||||
)
|
||||
|
||||
|
||||
def evalReduced(
|
||||
ctx: QueryContext, part: CompValue
|
||||
) -> Generator[FrozenBindings, None, None]:
|
||||
"""apply REDUCED to result
|
||||
|
||||
REDUCED is not as strict as DISTINCT, but if the incoming rows were sorted
|
||||
it should produce the same result with limited extra memory and time per
|
||||
incoming row.
|
||||
"""
|
||||
|
||||
# This implementation uses a most recently used strategy and a limited
|
||||
# buffer size. It relates to a LRU caching algorithm:
|
||||
# https://en.wikipedia.org/wiki/Cache_algorithms#Least_Recently_Used_.28LRU.29
|
||||
MAX = 1
|
||||
# TODO: add configuration or determine "best" size for most use cases
|
||||
# 0: No reduction
|
||||
# 1: compare only with the last row, almost no reduction with
|
||||
# unordered incoming rows
|
||||
# N: The greater the buffer size the greater the reduction but more
|
||||
# memory and time are needed
|
||||
|
||||
# mixed data structure: set for lookup, deque for append/pop/remove
|
||||
mru_set = set()
|
||||
mru_queue: Deque[Any] = collections.deque()
|
||||
|
||||
for row in evalPart(ctx, part.p):
|
||||
if row in mru_set:
|
||||
# forget last position of row
|
||||
mru_queue.remove(row)
|
||||
else:
|
||||
# row seems to be new
|
||||
yield row
|
||||
mru_set.add(row)
|
||||
if len(mru_set) > MAX:
|
||||
# drop the least recently used row from buffer
|
||||
mru_set.remove(mru_queue.pop())
|
||||
# put row to the front
|
||||
mru_queue.appendleft(row)
|
||||
|
||||
|
||||
def evalDistinct(
|
||||
ctx: QueryContext, part: CompValue
|
||||
) -> Generator[FrozenBindings, None, None]:
|
||||
res = evalPart(ctx, part.p)
|
||||
|
||||
done = set()
|
||||
for x in res:
|
||||
if x not in done:
|
||||
yield x
|
||||
done.add(x)
|
||||
|
||||
|
||||
def evalProject(ctx: QueryContext, project: CompValue):
|
||||
res = evalPart(ctx, project.p)
|
||||
return (row.project(project.PV) for row in res)
|
||||
|
||||
|
||||
def evalSelectQuery(
|
||||
ctx: QueryContext, query: CompValue
|
||||
) -> Mapping[str, Union[str, List[Variable], Iterable[FrozenDict]]]:
|
||||
res: Dict[str, Union[str, List[Variable], Iterable[FrozenDict]]] = {}
|
||||
res["type_"] = "SELECT"
|
||||
res["bindings"] = evalPart(ctx, query.p)
|
||||
res["vars_"] = query.PV
|
||||
return res
|
||||
|
||||
|
||||
def evalAskQuery(ctx: QueryContext, query: CompValue) -> Mapping[str, Union[str, bool]]:
|
||||
res: Dict[str, Union[bool, str]] = {}
|
||||
res["type_"] = "ASK"
|
||||
res["askAnswer"] = False
|
||||
for x in evalPart(ctx, query.p):
|
||||
res["askAnswer"] = True
|
||||
break
|
||||
|
||||
return res
|
||||
|
||||
|
||||
def evalConstructQuery(
|
||||
ctx: QueryContext, query: CompValue
|
||||
) -> Mapping[str, Union[str, Graph]]:
|
||||
template = query.template
|
||||
|
||||
if not template:
|
||||
# a construct-where query
|
||||
template = query.p.p.triples # query->project->bgp ...
|
||||
|
||||
graph = Graph()
|
||||
|
||||
for c in evalPart(ctx, query.p):
|
||||
graph += _fillTemplate(template, c)
|
||||
|
||||
res: Dict[str, Union[str, Graph]] = {}
|
||||
res["type_"] = "CONSTRUCT"
|
||||
res["graph"] = graph
|
||||
|
||||
return res
|
||||
|
||||
|
||||
def evalDescribeQuery(ctx: QueryContext, query) -> Dict[str, Union[str, Graph]]:
|
||||
# Create a result graph and bind namespaces from the graph being queried
|
||||
graph = Graph()
|
||||
# type error: Item "None" of "Optional[Graph]" has no attribute "namespaces"
|
||||
for pfx, ns in ctx.graph.namespaces(): # type: ignore[union-attr]
|
||||
graph.bind(pfx, ns)
|
||||
|
||||
to_describe = set()
|
||||
|
||||
# Explicit IRIs may be provided to a DESCRIBE query.
|
||||
# If there is a WHERE clause, explicit IRIs may be provided in
|
||||
# addition to projected variables. Find those explicit IRIs and
|
||||
# prepare to describe them.
|
||||
for iri in query.PV:
|
||||
if isinstance(iri, URIRef):
|
||||
to_describe.add(iri)
|
||||
|
||||
# If there is a WHERE clause, evaluate it then find the unique set of
|
||||
# resources to describe across all bindings and projected variables
|
||||
if query.p is not None:
|
||||
bindings = evalPart(ctx, query.p)
|
||||
to_describe.update(*(set(binding.values()) for binding in bindings))
|
||||
|
||||
# Get a CBD for all resources identified to describe
|
||||
for resource in to_describe:
|
||||
# type error: Item "None" of "Optional[Graph]" has no attribute "cbd"
|
||||
ctx.graph.cbd(resource, target_graph=graph) # type: ignore[union-attr]
|
||||
|
||||
res: Dict[str, Union[str, Graph]] = {}
|
||||
res["type_"] = "DESCRIBE"
|
||||
res["graph"] = graph
|
||||
|
||||
return res
|
||||
|
||||
|
||||
def evalQuery(
|
||||
graph: Graph,
|
||||
query: Query,
|
||||
initBindings: Optional[Mapping[str, Identifier]] = None,
|
||||
base: Optional[str] = None,
|
||||
) -> Mapping[Any, Any]:
|
||||
"""
|
||||
|
||||
.. caution::
|
||||
|
||||
This method can access indirectly requested network endpoints, for
|
||||
example, query processing will attempt to access network endpoints
|
||||
specified in ``SERVICE`` directives.
|
||||
|
||||
When processing untrusted or potentially malicious queries, measures
|
||||
should be taken to restrict network and file access.
|
||||
|
||||
For information on available security measures, see the RDFLib
|
||||
:doc:`Security Considerations </security_considerations>`
|
||||
documentation.
|
||||
"""
|
||||
main = query.algebra
|
||||
|
||||
initBindings = dict((Variable(k), v) for k, v in (initBindings or {}).items())
|
||||
|
||||
ctx = QueryContext(
|
||||
graph, initBindings=initBindings, datasetClause=main.datasetClause
|
||||
)
|
||||
|
||||
ctx.prologue = query.prologue
|
||||
|
||||
return evalPart(ctx, main)
|
||||
@@ -0,0 +1,188 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import collections
|
||||
from typing import (
|
||||
Any,
|
||||
DefaultDict,
|
||||
Generator,
|
||||
Iterable,
|
||||
Mapping,
|
||||
Set,
|
||||
Tuple,
|
||||
TypeVar,
|
||||
Union,
|
||||
overload,
|
||||
)
|
||||
|
||||
from rdflib.plugins.sparql.operators import EBV
|
||||
from rdflib.plugins.sparql.parserutils import CompValue, Expr
|
||||
from rdflib.plugins.sparql.sparql import (
|
||||
FrozenBindings,
|
||||
FrozenDict,
|
||||
NotBoundError,
|
||||
QueryContext,
|
||||
SPARQLError,
|
||||
)
|
||||
from rdflib.term import BNode, Identifier, Literal, URIRef, Variable
|
||||
|
||||
_ContextType = Union[FrozenBindings, QueryContext]
|
||||
_FrozenDictT = TypeVar("_FrozenDictT", bound=FrozenDict)
|
||||
|
||||
|
||||
def _diff(
|
||||
a: Iterable[_FrozenDictT], b: Iterable[_FrozenDictT], expr
|
||||
) -> Set[_FrozenDictT]:
|
||||
res = set()
|
||||
|
||||
for x in a:
|
||||
if all(not x.compatible(y) or not _ebv(expr, x.merge(y)) for y in b):
|
||||
res.add(x)
|
||||
|
||||
return res
|
||||
|
||||
|
||||
def _minus(
|
||||
a: Iterable[_FrozenDictT], b: Iterable[_FrozenDictT]
|
||||
) -> Generator[_FrozenDictT, None, None]:
|
||||
for x in a:
|
||||
if all((not x.compatible(y)) or x.disjointDomain(y) for y in b):
|
||||
yield x
|
||||
|
||||
|
||||
@overload
|
||||
def _join(
|
||||
a: Iterable[FrozenBindings], b: Iterable[Mapping[Identifier, Identifier]]
|
||||
) -> Generator[FrozenBindings, None, None]: ...
|
||||
|
||||
|
||||
@overload
|
||||
def _join(
|
||||
a: Iterable[FrozenDict], b: Iterable[Mapping[Identifier, Identifier]]
|
||||
) -> Generator[FrozenDict, None, None]: ...
|
||||
|
||||
|
||||
def _join(
|
||||
a: Iterable[FrozenDict], b: Iterable[Mapping[Identifier, Identifier]]
|
||||
) -> Generator[FrozenDict, None, None]:
|
||||
for x in a:
|
||||
for y in b:
|
||||
if x.compatible(y):
|
||||
yield x.merge(y)
|
||||
|
||||
|
||||
def _ebv(expr: Union[Literal, Variable, Expr], ctx: FrozenDict) -> bool:
|
||||
"""
|
||||
Return true/false for the given expr
|
||||
Either the expr is itself true/false
|
||||
or evaluates to something, with the given ctx
|
||||
|
||||
an error is false
|
||||
"""
|
||||
|
||||
try:
|
||||
return EBV(expr)
|
||||
except SPARQLError:
|
||||
pass
|
||||
if isinstance(expr, Expr):
|
||||
try:
|
||||
return EBV(expr.eval(ctx))
|
||||
except SPARQLError:
|
||||
return False # filter error == False
|
||||
# type error: Subclass of "Literal" and "CompValue" cannot exist: would have incompatible method signatures
|
||||
elif isinstance(expr, CompValue): # type: ignore[unreachable]
|
||||
raise Exception("Weird - filter got a CompValue without evalfn! %r" % expr)
|
||||
elif isinstance(expr, Variable):
|
||||
try:
|
||||
return EBV(ctx[expr])
|
||||
except: # noqa: E722
|
||||
return False
|
||||
return False
|
||||
|
||||
|
||||
@overload
|
||||
def _eval(
|
||||
expr: Union[Literal, URIRef],
|
||||
ctx: FrozenBindings,
|
||||
raise_not_bound_error: bool = ...,
|
||||
) -> Union[Literal, URIRef]: ...
|
||||
|
||||
|
||||
@overload
|
||||
def _eval(
|
||||
expr: Union[Variable, Expr],
|
||||
ctx: FrozenBindings,
|
||||
raise_not_bound_error: bool = ...,
|
||||
) -> Union[Any, SPARQLError]: ...
|
||||
|
||||
|
||||
def _eval(
|
||||
expr: Union[Literal, URIRef, Variable, Expr],
|
||||
ctx: FrozenBindings,
|
||||
raise_not_bound_error: bool = True,
|
||||
) -> Any:
|
||||
if isinstance(expr, (Literal, URIRef)):
|
||||
return expr
|
||||
if isinstance(expr, Expr):
|
||||
return expr.eval(ctx)
|
||||
elif isinstance(expr, Variable):
|
||||
try:
|
||||
return ctx[expr]
|
||||
except KeyError:
|
||||
if raise_not_bound_error:
|
||||
raise NotBoundError("Variable %s is not bound" % expr)
|
||||
else:
|
||||
return None
|
||||
elif isinstance(expr, CompValue): # type: ignore[unreachable]
|
||||
raise Exception("Weird - _eval got a CompValue without evalfn! %r" % expr)
|
||||
else:
|
||||
raise Exception("Cannot eval thing: %s (%s)" % (expr, type(expr)))
|
||||
|
||||
|
||||
def _filter(
|
||||
a: Iterable[FrozenDict], expr: Union[Literal, Variable, Expr]
|
||||
) -> Generator[FrozenDict, None, None]:
|
||||
for c in a:
|
||||
if _ebv(expr, c):
|
||||
yield c
|
||||
|
||||
|
||||
def _fillTemplate(
|
||||
template: Iterable[Tuple[Identifier, Identifier, Identifier]],
|
||||
solution: _ContextType,
|
||||
) -> Generator[Tuple[Identifier, Identifier, Identifier], None, None]:
|
||||
"""
|
||||
For construct/deleteWhere and friends
|
||||
|
||||
Fill a triple template with instantiated variables
|
||||
"""
|
||||
|
||||
bnodeMap: DefaultDict[BNode, BNode] = collections.defaultdict(BNode)
|
||||
for t in template:
|
||||
s, p, o = t
|
||||
|
||||
_s = solution.get(s)
|
||||
_p = solution.get(p)
|
||||
_o = solution.get(o)
|
||||
|
||||
# instantiate new bnodes for each solution
|
||||
_s, _p, _o = [
|
||||
bnodeMap[x] if isinstance(x, BNode) else y for x, y in zip(t, (_s, _p, _o))
|
||||
]
|
||||
|
||||
if _s is not None and _p is not None and _o is not None:
|
||||
yield (_s, _p, _o)
|
||||
|
||||
|
||||
_ValueT = TypeVar("_ValueT", Variable, BNode, URIRef, Literal)
|
||||
|
||||
|
||||
def _val(v: _ValueT) -> Tuple[int, _ValueT]:
|
||||
"""utilitity for ordering things"""
|
||||
if isinstance(v, Variable):
|
||||
return (0, v)
|
||||
elif isinstance(v, BNode):
|
||||
return (1, v)
|
||||
elif isinstance(v, URIRef):
|
||||
return (2, v)
|
||||
elif isinstance(v, Literal):
|
||||
return (3, v)
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,316 @@
|
||||
"""
|
||||
|
||||
NOTE: PyParsing setResultName/__call__ provides a very similar solution to this
|
||||
I didn't realise at the time of writing and I will remove a
|
||||
lot of this code at some point
|
||||
|
||||
Utility classes for creating an abstract-syntax tree out with pyparsing actions
|
||||
|
||||
Lets you label and group parts of parser production rules
|
||||
|
||||
For example:
|
||||
|
||||
# [5] BaseDecl ::= 'BASE' IRIREF
|
||||
BaseDecl = Comp('Base', Keyword('BASE') + Param('iri',IRIREF))
|
||||
|
||||
After parsing, this gives you back an CompValue object,
|
||||
which is a dict/object with the parameters specified.
|
||||
So you can access the parameters are attributes or as keys:
|
||||
|
||||
baseDecl.iri
|
||||
|
||||
Comp lets you set an evalFn that is bound to the eval method of
|
||||
the resulting CompValue
|
||||
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections import OrderedDict
|
||||
from types import MethodType
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Callable,
|
||||
List,
|
||||
Mapping,
|
||||
Optional,
|
||||
Tuple,
|
||||
TypeVar,
|
||||
Union,
|
||||
)
|
||||
|
||||
from pyparsing import ParserElement, ParseResults, TokenConverter, originalTextFor
|
||||
|
||||
from rdflib.term import BNode, Identifier, Variable
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from rdflib.plugins.sparql.sparql import FrozenBindings
|
||||
|
||||
|
||||
# This is an alternative
|
||||
|
||||
# Comp('Sum')( Param('x')(Number) + '+' + Param('y')(Number) )
|
||||
|
||||
|
||||
def value(
|
||||
ctx: FrozenBindings,
|
||||
val: Any,
|
||||
variables: bool = False,
|
||||
errors: bool = False,
|
||||
) -> Any:
|
||||
"""
|
||||
utility function for evaluating something...
|
||||
|
||||
Variables will be looked up in the context
|
||||
Normally, non-bound vars is an error,
|
||||
set variables=True to return unbound vars
|
||||
|
||||
Normally, an error raises the error,
|
||||
set errors=True to return error
|
||||
|
||||
"""
|
||||
|
||||
if isinstance(val, Expr):
|
||||
return val.eval(ctx) # recurse?
|
||||
elif isinstance(val, CompValue):
|
||||
raise Exception("What do I do with this CompValue? %s" % val)
|
||||
|
||||
elif isinstance(val, list):
|
||||
return [value(ctx, x, variables, errors) for x in val]
|
||||
|
||||
elif isinstance(val, (BNode, Variable)):
|
||||
r = ctx.get(val)
|
||||
if isinstance(r, SPARQLError) and not errors:
|
||||
raise r
|
||||
if r is not None:
|
||||
return r
|
||||
|
||||
# not bound
|
||||
if variables:
|
||||
return val
|
||||
else:
|
||||
raise NotBoundError
|
||||
|
||||
elif isinstance(val, ParseResults) and len(val) == 1:
|
||||
return value(ctx, val[0], variables, errors)
|
||||
else:
|
||||
return val
|
||||
|
||||
|
||||
class ParamValue:
|
||||
"""
|
||||
The result of parsing a Param
|
||||
This just keeps the name/value
|
||||
All cleverness is in the CompValue
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, name: str, tokenList: Union[List[Any], ParseResults], isList: bool
|
||||
):
|
||||
self.isList = isList
|
||||
self.name = name
|
||||
if isinstance(tokenList, (list, ParseResults)) and len(tokenList) == 1:
|
||||
tokenList = tokenList[0]
|
||||
|
||||
self.tokenList = tokenList
|
||||
|
||||
def __str__(self) -> str:
|
||||
return "Param(%s, %s)" % (self.name, self.tokenList)
|
||||
|
||||
|
||||
class Param(TokenConverter):
|
||||
"""
|
||||
A pyparsing token for labelling a part of the parse-tree
|
||||
if isList is true repeat occurrences of ParamList have
|
||||
their values merged in a list
|
||||
"""
|
||||
|
||||
def __init__(self, name: str, expr, isList: bool = False):
|
||||
self.isList = isList
|
||||
TokenConverter.__init__(self, expr)
|
||||
self.setName(name)
|
||||
self.addParseAction(self.postParse2)
|
||||
|
||||
def postParse2(self, tokenList: Union[List[Any], ParseResults]) -> ParamValue:
|
||||
return ParamValue(self.name, tokenList, self.isList)
|
||||
|
||||
|
||||
class ParamList(Param):
|
||||
"""
|
||||
A shortcut for a Param with isList=True
|
||||
"""
|
||||
|
||||
def __init__(self, name: str, expr):
|
||||
Param.__init__(self, name, expr, True)
|
||||
|
||||
|
||||
_ValT = TypeVar("_ValT")
|
||||
|
||||
|
||||
class CompValue(OrderedDict):
|
||||
"""
|
||||
The result of parsing a Comp
|
||||
Any included Params are available as Dict keys
|
||||
or as attributes
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, name: str, **values):
|
||||
OrderedDict.__init__(self)
|
||||
self.name = name
|
||||
self.update(values)
|
||||
|
||||
def clone(self) -> CompValue:
|
||||
return CompValue(self.name, **self)
|
||||
|
||||
def __str__(self) -> str:
|
||||
return self.name + "_" + OrderedDict.__str__(self)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return self.name + "_" + dict.__repr__(self)
|
||||
|
||||
def _value(
|
||||
self, val: _ValT, variables: bool = False, errors: bool = False
|
||||
) -> Union[_ValT, Any]:
|
||||
if self.ctx is not None:
|
||||
return value(self.ctx, val, variables)
|
||||
else:
|
||||
return val
|
||||
|
||||
def __getitem__(self, a):
|
||||
return self._value(OrderedDict.__getitem__(self, a))
|
||||
|
||||
# type error: Signature of "get" incompatible with supertype "dict"
|
||||
# type error: Signature of "get" incompatible with supertype "Mapping" [override]
|
||||
def get(self, a, variables: bool = False, errors: bool = False): # type: ignore[override]
|
||||
return self._value(OrderedDict.get(self, a, a), variables, errors)
|
||||
|
||||
def __getattr__(self, a: str) -> Any:
|
||||
# Hack hack: OrderedDict relies on this
|
||||
if a in ("_OrderedDict__root", "_OrderedDict__end"):
|
||||
raise AttributeError()
|
||||
try:
|
||||
return self[a]
|
||||
except KeyError:
|
||||
# raise AttributeError('no such attribute '+a)
|
||||
return None
|
||||
|
||||
if TYPE_CHECKING:
|
||||
# this is here because properties are dynamically set on CompValue
|
||||
def __setattr__(self, __name: str, __value: Any) -> None: ...
|
||||
|
||||
|
||||
class Expr(CompValue):
|
||||
"""
|
||||
A CompValue that is evaluatable
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
name: str,
|
||||
evalfn: Optional[Callable[[Any, Any], Any]] = None,
|
||||
**values,
|
||||
):
|
||||
super(Expr, self).__init__(name, **values)
|
||||
|
||||
self._evalfn = None
|
||||
if evalfn:
|
||||
self._evalfn = MethodType(evalfn, self)
|
||||
|
||||
def eval(self, ctx: Any = {}) -> Union[SPARQLError, Any]:
|
||||
try:
|
||||
self.ctx: Optional[Union[Mapping, FrozenBindings]] = ctx
|
||||
# type error: "None" not callable
|
||||
return self._evalfn(ctx) # type: ignore[misc]
|
||||
except SPARQLError as e:
|
||||
return e
|
||||
finally:
|
||||
self.ctx = None
|
||||
|
||||
|
||||
class Comp(TokenConverter):
|
||||
"""
|
||||
A pyparsing token for grouping together things with a label
|
||||
Any sub-tokens that are not Params will be ignored.
|
||||
|
||||
Returns CompValue / Expr objects - depending on whether evalFn is set.
|
||||
"""
|
||||
|
||||
def __init__(self, name: str, expr: ParserElement):
|
||||
self.expr = expr
|
||||
TokenConverter.__init__(self, expr)
|
||||
self.setName(name)
|
||||
self.evalfn: Optional[Callable[[Any, Any], Any]] = None
|
||||
|
||||
def postParse(
|
||||
self, instring: str, loc: int, tokenList: ParseResults
|
||||
) -> Union[Expr, CompValue]:
|
||||
res: Union[Expr, CompValue]
|
||||
if self.evalfn:
|
||||
res = Expr(self.name)
|
||||
res._evalfn = MethodType(self.evalfn, res)
|
||||
else:
|
||||
res = CompValue(self.name)
|
||||
if self.name == "ServiceGraphPattern":
|
||||
# Then this must be a service graph pattern and have
|
||||
# already matched.
|
||||
# lets assume there is one, for now, then test for two later.
|
||||
sgp = originalTextFor(self.expr)
|
||||
service_string = sgp.searchString(instring)[0][0]
|
||||
res["service_string"] = service_string
|
||||
|
||||
for t in tokenList:
|
||||
if isinstance(t, ParamValue):
|
||||
if t.isList:
|
||||
if t.name not in res:
|
||||
res[t.name] = []
|
||||
res[t.name].append(t.tokenList)
|
||||
else:
|
||||
res[t.name] = t.tokenList
|
||||
# res.append(t.tokenList)
|
||||
# if isinstance(t,CompValue):
|
||||
# res.update(t)
|
||||
return res
|
||||
|
||||
def setEvalFn(self, evalfn: Callable[[Any, Any], Any]) -> Comp:
|
||||
self.evalfn = evalfn
|
||||
return self
|
||||
|
||||
|
||||
def prettify_parsetree(t: ParseResults, indent: str = "", depth: int = 0) -> str:
|
||||
out: List[str] = []
|
||||
for e in t.asList():
|
||||
out.append(_prettify_sub_parsetree(e, indent, depth + 1))
|
||||
for k, v in sorted(t.items()):
|
||||
out.append("%s%s- %s:\n" % (indent, " " * depth, k))
|
||||
out.append(_prettify_sub_parsetree(v, indent, depth + 1))
|
||||
return "".join(out)
|
||||
|
||||
|
||||
def _prettify_sub_parsetree(
|
||||
t: Union[Identifier, CompValue, set, list, dict, Tuple, bool, None],
|
||||
indent: str = "",
|
||||
depth: int = 0,
|
||||
) -> str:
|
||||
out: List[str] = []
|
||||
if isinstance(t, CompValue):
|
||||
out.append("%s%s> %s:\n" % (indent, " " * depth, t.name))
|
||||
for k, v in t.items():
|
||||
out.append("%s%s- %s:\n" % (indent, " " * (depth + 1), k))
|
||||
out.append(_prettify_sub_parsetree(v, indent, depth + 2))
|
||||
elif isinstance(t, dict):
|
||||
for k, v in t.items():
|
||||
out.append("%s%s- %s:\n" % (indent, " " * (depth + 1), k))
|
||||
out.append(_prettify_sub_parsetree(v, indent, depth + 2))
|
||||
elif isinstance(t, list):
|
||||
for e in t:
|
||||
out.append(_prettify_sub_parsetree(e, indent, depth + 1))
|
||||
else:
|
||||
out.append("%s%s- %r\n" % (indent, " " * depth, t))
|
||||
return "".join(out)
|
||||
|
||||
|
||||
# hurrah for circular imports
|
||||
from rdflib.plugins.sparql.sparql import NotBoundError, SPARQLError # noqa: E402
|
||||
@@ -0,0 +1,147 @@
|
||||
"""
|
||||
Code for tying SPARQL Engine into RDFLib
|
||||
|
||||
These should be automatically registered with RDFLib
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Mapping, Optional, Union
|
||||
|
||||
from rdflib.graph import Graph
|
||||
from rdflib.plugins.sparql.algebra import translateQuery, translateUpdate
|
||||
from rdflib.plugins.sparql.evaluate import evalQuery
|
||||
from rdflib.plugins.sparql.parser import parseQuery, parseUpdate
|
||||
from rdflib.plugins.sparql.sparql import Query, Update
|
||||
from rdflib.plugins.sparql.update import evalUpdate
|
||||
from rdflib.query import Processor, Result, UpdateProcessor
|
||||
from rdflib.term import Identifier
|
||||
|
||||
|
||||
def prepareQuery(
|
||||
queryString: str,
|
||||
initNs: Optional[Mapping[str, Any]] = None,
|
||||
base: Optional[str] = None,
|
||||
) -> Query:
|
||||
"""
|
||||
Parse and translate a SPARQL Query
|
||||
"""
|
||||
if initNs is None:
|
||||
initNs = {}
|
||||
ret = translateQuery(parseQuery(queryString), base, initNs)
|
||||
ret._original_args = (queryString, initNs, base)
|
||||
return ret
|
||||
|
||||
|
||||
def prepareUpdate(
|
||||
updateString: str,
|
||||
initNs: Optional[Mapping[str, Any]] = None,
|
||||
base: Optional[str] = None,
|
||||
) -> Update:
|
||||
"""
|
||||
Parse and translate a SPARQL Update
|
||||
"""
|
||||
if initNs is None:
|
||||
initNs = {}
|
||||
ret = translateUpdate(parseUpdate(updateString), base, initNs)
|
||||
ret._original_args = (updateString, initNs, base)
|
||||
return ret
|
||||
|
||||
|
||||
def processUpdate(
|
||||
graph: Graph,
|
||||
updateString: str,
|
||||
initBindings: Optional[Mapping[str, Identifier]] = None,
|
||||
initNs: Optional[Mapping[str, Any]] = None,
|
||||
base: Optional[str] = None,
|
||||
) -> None:
|
||||
"""
|
||||
Process a SPARQL Update Request
|
||||
returns Nothing on success or raises Exceptions on error
|
||||
"""
|
||||
evalUpdate(
|
||||
graph, translateUpdate(parseUpdate(updateString), base, initNs), initBindings
|
||||
)
|
||||
|
||||
|
||||
class SPARQLResult(Result):
|
||||
def __init__(self, res: Mapping[str, Any]):
|
||||
Result.__init__(self, res["type_"])
|
||||
self.vars = res.get("vars_")
|
||||
# type error: Incompatible types in assignment (expression has type "Optional[Any]", variable has type "MutableSequence[Mapping[Variable, Identifier]]")
|
||||
self.bindings = res.get("bindings") # type: ignore[assignment]
|
||||
self.askAnswer = res.get("askAnswer")
|
||||
self.graph = res.get("graph")
|
||||
|
||||
|
||||
class SPARQLUpdateProcessor(UpdateProcessor):
|
||||
def __init__(self, graph):
|
||||
self.graph = graph
|
||||
|
||||
def update(
|
||||
self,
|
||||
strOrQuery: Union[str, Update],
|
||||
initBindings: Optional[Mapping[str, Identifier]] = None,
|
||||
initNs: Optional[Mapping[str, Any]] = None,
|
||||
) -> None:
|
||||
"""
|
||||
.. caution::
|
||||
|
||||
This method can access indirectly requested network endpoints, for
|
||||
example, query processing will attempt to access network endpoints
|
||||
specified in ``SERVICE`` directives.
|
||||
|
||||
When processing untrusted or potentially malicious queries, measures
|
||||
should be taken to restrict network and file access.
|
||||
|
||||
For information on available security measures, see the RDFLib
|
||||
:doc:`Security Considerations </security_considerations>`
|
||||
documentation.
|
||||
"""
|
||||
|
||||
if isinstance(strOrQuery, str):
|
||||
strOrQuery = translateUpdate(parseUpdate(strOrQuery), initNs=initNs)
|
||||
|
||||
return evalUpdate(self.graph, strOrQuery, initBindings)
|
||||
|
||||
|
||||
class SPARQLProcessor(Processor):
|
||||
def __init__(self, graph):
|
||||
self.graph = graph
|
||||
|
||||
# NOTE on type error: this is because the super type constructor does not
|
||||
# accept base argument and thie position of the DEBUG argument is
|
||||
# different.
|
||||
# type error: Signature of "query" incompatible with supertype "Processor"
|
||||
def query( # type: ignore[override]
|
||||
self,
|
||||
strOrQuery: Union[str, Query],
|
||||
initBindings: Optional[Mapping[str, Identifier]] = None,
|
||||
initNs: Optional[Mapping[str, Any]] = None,
|
||||
base: Optional[str] = None,
|
||||
DEBUG: bool = False,
|
||||
) -> Mapping[str, Any]:
|
||||
"""
|
||||
Evaluate a query with the given initial bindings, and initial
|
||||
namespaces. The given base is used to resolve relative URIs in
|
||||
the query and will be overridden by any BASE given in the query.
|
||||
|
||||
.. caution::
|
||||
|
||||
This method can access indirectly requested network endpoints, for
|
||||
example, query processing will attempt to access network endpoints
|
||||
specified in ``SERVICE`` directives.
|
||||
|
||||
When processing untrusted or potentially malicious queries, measures
|
||||
should be taken to restrict network and file access.
|
||||
|
||||
For information on available security measures, see the RDFLib
|
||||
:doc:`Security Considerations </security_considerations>`
|
||||
documentation.
|
||||
"""
|
||||
|
||||
if isinstance(strOrQuery, str):
|
||||
strOrQuery = translateQuery(parseQuery(strOrQuery), base, initNs)
|
||||
|
||||
return evalQuery(self.graph, strOrQuery, initBindings, base)
|
||||
+3
@@ -0,0 +1,3 @@
|
||||
"""
|
||||
Parsers and serializers for SPARQL Result formats
|
||||
"""
|
||||
+104
@@ -0,0 +1,104 @@
|
||||
"""
|
||||
|
||||
This module implements a parser and serializer for the CSV SPARQL result
|
||||
formats
|
||||
|
||||
http://www.w3.org/TR/sparql11-results-csv-tsv/
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import codecs
|
||||
import csv
|
||||
from io import BufferedIOBase, TextIOBase
|
||||
from typing import IO, Dict, List, Optional, Union, cast
|
||||
|
||||
from rdflib.plugins.sparql.processor import SPARQLResult
|
||||
from rdflib.query import Result, ResultParser, ResultSerializer
|
||||
from rdflib.term import BNode, Identifier, Literal, URIRef, Variable
|
||||
|
||||
|
||||
class CSVResultParser(ResultParser):
|
||||
def __init__(self):
|
||||
self.delim = ","
|
||||
|
||||
# type error: Signature of "parse" incompatible with supertype "ResultParser"
|
||||
def parse(self, source: IO, content_type: Optional[str] = None) -> Result: # type: ignore[override]
|
||||
r = Result("SELECT")
|
||||
|
||||
# type error: Incompatible types in assignment (expression has type "StreamReader", variable has type "IO[Any]")
|
||||
if isinstance(source.read(0), bytes):
|
||||
# if reading from source returns bytes do utf-8 decoding
|
||||
# type error: Incompatible types in assignment (expression has type "StreamReader", variable has type "IO[Any]")
|
||||
source = codecs.getreader("utf-8")(source) # type: ignore[assignment]
|
||||
|
||||
reader = csv.reader(source, delimiter=self.delim)
|
||||
r.vars = [Variable(x) for x in next(reader)]
|
||||
r.bindings = []
|
||||
|
||||
for row in reader:
|
||||
r.bindings.append(self.parseRow(row, r.vars))
|
||||
|
||||
return r
|
||||
|
||||
def parseRow(
|
||||
self, row: List[str], v: List[Variable]
|
||||
) -> Dict[Variable, Union[BNode, URIRef, Literal]]:
|
||||
return dict(
|
||||
(var, val)
|
||||
for var, val in zip(v, [self.convertTerm(t) for t in row])
|
||||
if val is not None
|
||||
)
|
||||
|
||||
def convertTerm(self, t: str) -> Optional[Union[BNode, URIRef, Literal]]:
|
||||
if t == "":
|
||||
return None
|
||||
if t.startswith("_:"):
|
||||
return BNode(t) # or generate new IDs?
|
||||
if t.startswith("http://") or t.startswith("https://"): # TODO: more?
|
||||
return URIRef(t)
|
||||
return Literal(t)
|
||||
|
||||
|
||||
class CSVResultSerializer(ResultSerializer):
|
||||
def __init__(self, result: SPARQLResult):
|
||||
ResultSerializer.__init__(self, result)
|
||||
|
||||
self.delim = ","
|
||||
if result.type != "SELECT":
|
||||
raise Exception("CSVSerializer can only serialize select query results")
|
||||
|
||||
def serialize(self, stream: IO, encoding: str = "utf-8", **kwargs) -> None:
|
||||
# the serialiser writes bytes in the given encoding
|
||||
# in py3 csv.writer is unicode aware and writes STRINGS,
|
||||
# so we encode afterward
|
||||
|
||||
import codecs
|
||||
|
||||
# TODO: Find a better solution for all this casting
|
||||
writable_stream = cast(Union[TextIOBase, BufferedIOBase], stream)
|
||||
if isinstance(writable_stream, TextIOBase):
|
||||
string_stream: TextIOBase = writable_stream
|
||||
else:
|
||||
byte_stream = cast(BufferedIOBase, writable_stream)
|
||||
string_stream = cast(TextIOBase, codecs.getwriter(encoding)(byte_stream))
|
||||
|
||||
out = csv.writer(string_stream, delimiter=self.delim)
|
||||
|
||||
vs = [self.serializeTerm(v, encoding) for v in self.result.vars] # type: ignore[union-attr]
|
||||
out.writerow(vs)
|
||||
for row in self.result.bindings:
|
||||
out.writerow(
|
||||
[self.serializeTerm(row.get(v), encoding) for v in self.result.vars] # type: ignore[union-attr]
|
||||
)
|
||||
|
||||
def serializeTerm(
|
||||
self, term: Optional[Identifier], encoding: str
|
||||
) -> Union[str, Identifier]:
|
||||
if term is None:
|
||||
return ""
|
||||
elif isinstance(term, BNode):
|
||||
return f"_:{term}"
|
||||
else:
|
||||
return term
|
||||
@@ -0,0 +1,16 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import IO, Optional
|
||||
|
||||
from rdflib.graph import Graph
|
||||
from rdflib.query import Result, ResultParser
|
||||
|
||||
|
||||
class GraphResultParser(ResultParser):
|
||||
# type error: Signature of "parse" incompatible with supertype "ResultParser"
|
||||
def parse(self, source: IO, content_type: Optional[str]) -> Result: # type: ignore[override]
|
||||
res = Result("CONSTRUCT") # hmm - or describe?type_)
|
||||
res.graph = Graph()
|
||||
res.graph.parse(source, format=content_type)
|
||||
|
||||
return res
|
||||
+164
@@ -0,0 +1,164 @@
|
||||
"""A Serializer for SPARQL results in JSON:
|
||||
|
||||
http://www.w3.org/TR/rdf-sparql-json-res/
|
||||
|
||||
Bits and pieces borrowed from:
|
||||
http://projects.bigasterisk.com/sparqlhttp/
|
||||
|
||||
Authors: Drew Perttula, Gunnar Aastrand Grimnes
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import IO, Any, Dict, Mapping, MutableSequence, Optional
|
||||
|
||||
from rdflib.query import Result, ResultException, ResultParser, ResultSerializer
|
||||
from rdflib.term import BNode, Identifier, Literal, URIRef, Variable
|
||||
|
||||
try:
|
||||
import orjson
|
||||
|
||||
_HAS_ORJSON = True
|
||||
except ImportError:
|
||||
orjson = None # type: ignore[assignment, unused-ignore]
|
||||
_HAS_ORJSON = False
|
||||
|
||||
|
||||
class JSONResultParser(ResultParser):
|
||||
# type error: Signature of "parse" incompatible with supertype "ResultParser"
|
||||
def parse(self, source: IO, content_type: Optional[str] = None) -> Result: # type: ignore[override]
|
||||
inp = source.read()
|
||||
if _HAS_ORJSON:
|
||||
try:
|
||||
loaded = orjson.loads(inp)
|
||||
except Exception as e:
|
||||
raise ResultException(f"Failed to parse result: {e}")
|
||||
else:
|
||||
if isinstance(inp, bytes):
|
||||
inp = inp.decode("utf-8")
|
||||
loaded = json.loads(inp)
|
||||
return JSONResult(loaded)
|
||||
|
||||
|
||||
class JSONResultSerializer(ResultSerializer):
|
||||
def __init__(self, result: Result):
|
||||
ResultSerializer.__init__(self, result)
|
||||
|
||||
# type error: Signature of "serialize" incompatible with supertype "ResultSerializer"
|
||||
def serialize(self, stream: IO, encoding: str = None) -> None: # type: ignore[override]
|
||||
res: Dict[str, Any] = {}
|
||||
if self.result.type == "ASK":
|
||||
res["head"] = {}
|
||||
res["boolean"] = self.result.askAnswer
|
||||
else:
|
||||
# select
|
||||
res["results"] = {}
|
||||
res["head"] = {}
|
||||
res["head"]["vars"] = self.result.vars
|
||||
res["results"]["bindings"] = [
|
||||
self._bindingToJSON(x) for x in self.result.bindings
|
||||
]
|
||||
if _HAS_ORJSON:
|
||||
try:
|
||||
r_bytes = orjson.dumps(res, option=orjson.OPT_NON_STR_KEYS)
|
||||
except Exception as e:
|
||||
raise ResultException(f"Failed to serialize result: {e}")
|
||||
if encoding is not None:
|
||||
# Note, orjson will always write utf-8 even if
|
||||
# encoding is specified as something else.
|
||||
try:
|
||||
stream.write(r_bytes)
|
||||
except (TypeError, ValueError):
|
||||
stream.write(r_bytes.decode("utf-8"))
|
||||
else:
|
||||
stream.write(r_bytes.decode("utf-8"))
|
||||
else:
|
||||
r_str = json.dumps(res, allow_nan=False, ensure_ascii=False)
|
||||
if encoding is not None:
|
||||
try:
|
||||
stream.write(r_str.encode(encoding))
|
||||
except (TypeError, ValueError):
|
||||
stream.write(r_str)
|
||||
else:
|
||||
stream.write(r_str)
|
||||
|
||||
def _bindingToJSON(self, b: Mapping[Variable, Identifier]) -> Dict[Variable, Any]:
|
||||
res = {}
|
||||
for var in b:
|
||||
j = termToJSON(self, b[var])
|
||||
if j is not None:
|
||||
res[var] = termToJSON(self, b[var])
|
||||
return res
|
||||
|
||||
|
||||
class JSONResult(Result):
|
||||
def __init__(self, json: Dict[str, Any]):
|
||||
self.json = json
|
||||
if "boolean" in json:
|
||||
type_ = "ASK"
|
||||
elif "results" in json:
|
||||
type_ = "SELECT"
|
||||
else:
|
||||
raise ResultException("No boolean or results in json!")
|
||||
|
||||
Result.__init__(self, type_)
|
||||
|
||||
if type_ == "ASK":
|
||||
self.askAnswer = bool(json["boolean"])
|
||||
else:
|
||||
self.bindings = self._get_bindings()
|
||||
self.vars = [Variable(x) for x in json["head"]["vars"]]
|
||||
|
||||
def _get_bindings(self) -> MutableSequence[Mapping[Variable, Identifier]]:
|
||||
ret: MutableSequence[Mapping[Variable, Identifier]] = []
|
||||
for row in self.json["results"]["bindings"]:
|
||||
outRow: Dict[Variable, Identifier] = {}
|
||||
for k, v in row.items():
|
||||
outRow[Variable(k)] = parseJsonTerm(v)
|
||||
ret.append(outRow)
|
||||
return ret
|
||||
|
||||
|
||||
def parseJsonTerm(d: Dict[str, str]) -> Identifier:
|
||||
"""rdflib object (Literal, URIRef, BNode) for the given json-format dict.
|
||||
|
||||
input is like:
|
||||
{ 'type': 'uri', 'value': 'http://famegame.com/2006/01/username' }
|
||||
{ 'type': 'literal', 'value': 'drewp' }
|
||||
"""
|
||||
|
||||
t = d["type"]
|
||||
if t == "uri":
|
||||
return URIRef(d["value"])
|
||||
elif t == "literal":
|
||||
return Literal(d["value"], datatype=d.get("datatype"), lang=d.get("xml:lang"))
|
||||
elif t == "typed-literal":
|
||||
return Literal(d["value"], datatype=URIRef(d["datatype"]))
|
||||
elif t == "bnode":
|
||||
return BNode(d["value"])
|
||||
else:
|
||||
raise NotImplementedError("json term type %r" % t)
|
||||
|
||||
|
||||
def termToJSON(
|
||||
self: JSONResultSerializer, term: Optional[Identifier]
|
||||
) -> Optional[Dict[str, str]]:
|
||||
if isinstance(term, URIRef):
|
||||
return {"type": "uri", "value": str(term)}
|
||||
elif isinstance(term, Literal):
|
||||
r = {"type": "literal", "value": str(term)}
|
||||
|
||||
if term.datatype is not None:
|
||||
r["datatype"] = str(term.datatype)
|
||||
if term.language is not None:
|
||||
r["xml:lang"] = term.language
|
||||
return r
|
||||
|
||||
elif isinstance(term, BNode):
|
||||
return {"type": "bnode", "value": str(term)}
|
||||
elif term is None:
|
||||
return None
|
||||
else:
|
||||
raise ResultException("Unknown term type: %s (%s)" % (term, type(term)))
|
||||
+70
@@ -0,0 +1,70 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import IO, Any, MutableMapping, Optional, Union
|
||||
|
||||
from rdflib.graph import Graph
|
||||
from rdflib.namespace import RDF, Namespace
|
||||
from rdflib.query import Result, ResultParser
|
||||
from rdflib.term import Node, Variable
|
||||
|
||||
RS = Namespace("http://www.w3.org/2001/sw/DataAccess/tests/result-set#")
|
||||
|
||||
|
||||
class RDFResultParser(ResultParser):
|
||||
def parse(self, source: Union[IO, Graph], **kwargs: Any) -> Result:
|
||||
return RDFResult(source, **kwargs)
|
||||
|
||||
|
||||
class RDFResult(Result):
|
||||
def __init__(self, source: Union[IO, Graph], **kwargs: Any):
|
||||
if not isinstance(source, Graph):
|
||||
graph = Graph()
|
||||
graph.parse(source, **kwargs)
|
||||
else:
|
||||
graph = source
|
||||
|
||||
rs = graph.value(predicate=RDF.type, object=RS.ResultSet)
|
||||
# there better be only one :)
|
||||
|
||||
if rs is None:
|
||||
type_ = "CONSTRUCT"
|
||||
|
||||
# use a new graph
|
||||
g = Graph()
|
||||
g += graph
|
||||
|
||||
else:
|
||||
askAnswer = graph.value(rs, RS.boolean)
|
||||
|
||||
if askAnswer is not None:
|
||||
type_ = "ASK"
|
||||
else:
|
||||
type_ = "SELECT"
|
||||
|
||||
Result.__init__(self, type_)
|
||||
|
||||
if type_ == "SELECT":
|
||||
# type error: Argument 1 to "Variable" has incompatible type "Node"; expected "str"
|
||||
self.vars = [Variable(v) for v in graph.objects(rs, RS.resultVariable)] # type: ignore[arg-type]
|
||||
|
||||
self.bindings = []
|
||||
|
||||
for s in graph.objects(rs, RS.solution):
|
||||
sol: MutableMapping[Variable, Optional[Node]] = {}
|
||||
for b in graph.objects(s, RS.binding):
|
||||
# type error: Argument 1 to "Variable" has incompatible type "Optional[Node]"; expected "str"
|
||||
sol[Variable(graph.value(b, RS.variable))] = graph.value( # type: ignore[arg-type]
|
||||
b, RS.value
|
||||
)
|
||||
# error: Argument 1 to "append" of "list" has incompatible type "MutableMapping[Variable, Optional[Node]]"; expected "Mapping[Variable, Identifier]"
|
||||
self.bindings.append(sol) # type: ignore[arg-type]
|
||||
elif type_ == "ASK":
|
||||
# type error: Item "Node" of "Optional[Node]" has no attribute "value"
|
||||
# type error: Item "None" of "Optional[Node]" has no attribute "value"
|
||||
self.askAnswer = askAnswer.value # type: ignore[union-attr]
|
||||
# type error: Item "Node" of "Optional[Node]" has no attribute "value"
|
||||
# type error: Item "None" of "Optional[Node]" has no attribute "value"
|
||||
if askAnswer.value is None: # type: ignore[union-attr]
|
||||
raise Exception("Malformed boolean in ask answer!")
|
||||
elif type_ == "CONSTRUCT":
|
||||
self.graph = g
|
||||
+105
@@ -0,0 +1,105 @@
|
||||
"""
|
||||
This implements the Tab Separated SPARQL Result Format
|
||||
|
||||
It is implemented with pyparsing, reusing the elements from the SPARQL Parser
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import codecs
|
||||
import typing
|
||||
from typing import IO, Union
|
||||
|
||||
from pyparsing import (
|
||||
FollowedBy,
|
||||
LineEnd,
|
||||
Literal,
|
||||
Optional,
|
||||
ParserElement,
|
||||
Suppress,
|
||||
ZeroOrMore,
|
||||
)
|
||||
|
||||
from rdflib.plugins.sparql.parser import (
|
||||
BLANK_NODE_LABEL,
|
||||
IRIREF,
|
||||
LANGTAG,
|
||||
STRING_LITERAL1,
|
||||
STRING_LITERAL2,
|
||||
BooleanLiteral,
|
||||
NumericLiteral,
|
||||
Var,
|
||||
)
|
||||
from rdflib.plugins.sparql.parserutils import Comp, CompValue, Param
|
||||
from rdflib.query import Result, ResultParser
|
||||
from rdflib.term import BNode, URIRef
|
||||
from rdflib.term import Literal as RDFLiteral
|
||||
|
||||
ParserElement.setDefaultWhitespaceChars(" \n")
|
||||
|
||||
|
||||
String = STRING_LITERAL1 | STRING_LITERAL2
|
||||
|
||||
RDFLITERAL = Comp(
|
||||
"literal",
|
||||
Param("string", String)
|
||||
+ Optional(
|
||||
Param("lang", LANGTAG.leaveWhitespace())
|
||||
| Literal("^^").leaveWhitespace() + Param("datatype", IRIREF).leaveWhitespace()
|
||||
),
|
||||
)
|
||||
|
||||
NONE_VALUE = object()
|
||||
|
||||
EMPTY = FollowedBy(LineEnd()) | FollowedBy("\t")
|
||||
EMPTY.setParseAction(lambda x: NONE_VALUE)
|
||||
|
||||
TERM = RDFLITERAL | IRIREF | BLANK_NODE_LABEL | NumericLiteral | BooleanLiteral
|
||||
|
||||
ROW = (EMPTY | TERM) + ZeroOrMore(Suppress("\t") + (EMPTY | TERM))
|
||||
ROW.parseWithTabs()
|
||||
|
||||
HEADER = Var + ZeroOrMore(Suppress("\t") + Var)
|
||||
HEADER.parseWithTabs()
|
||||
|
||||
|
||||
class TSVResultParser(ResultParser):
|
||||
# type error: Signature of "parse" incompatible with supertype "ResultParser" [override]
|
||||
def parse(self, source: IO, content_type: typing.Optional[str] = None) -> Result: # type: ignore[override]
|
||||
if isinstance(source.read(0), bytes):
|
||||
# if reading from source returns bytes do utf-8 decoding
|
||||
# type error: Incompatible types in assignment (expression has type "StreamReader", variable has type "IO[Any]")
|
||||
source = codecs.getreader("utf-8")(source) # type: ignore[assignment]
|
||||
|
||||
r = Result("SELECT")
|
||||
|
||||
header = source.readline()
|
||||
|
||||
r.vars = list(HEADER.parseString(header.strip(), parseAll=True))
|
||||
r.bindings = []
|
||||
while True:
|
||||
line = source.readline()
|
||||
if not line:
|
||||
break
|
||||
line = line.strip("\n")
|
||||
if line == "":
|
||||
continue
|
||||
|
||||
row = ROW.parseString(line, parseAll=True)
|
||||
# type error: Generator has incompatible item type "object"; expected "Identifier"
|
||||
r.bindings.append(dict(zip(r.vars, (self.convertTerm(x) for x in row)))) # type: ignore[misc]
|
||||
|
||||
return r
|
||||
|
||||
def convertTerm(
|
||||
self, t: Union[object, RDFLiteral, BNode, CompValue, URIRef]
|
||||
) -> typing.Optional[Union[object, BNode, URIRef, RDFLiteral]]:
|
||||
if t is NONE_VALUE:
|
||||
return None
|
||||
if isinstance(t, CompValue):
|
||||
if t.name == "literal":
|
||||
return RDFLiteral(t.string, lang=t.lang, datatype=t.datatype)
|
||||
else:
|
||||
raise Exception("I dont know how to handle this: %s" % (t,))
|
||||
else:
|
||||
return t
|
||||
+86
@@ -0,0 +1,86 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from io import StringIO
|
||||
from typing import IO, List, Optional, Union
|
||||
|
||||
from rdflib.namespace import NamespaceManager
|
||||
from rdflib.query import ResultSerializer
|
||||
from rdflib.term import BNode, Literal, URIRef, Variable
|
||||
|
||||
|
||||
def _termString(
|
||||
t: Optional[Union[URIRef, Literal, BNode]],
|
||||
namespace_manager: Optional[NamespaceManager],
|
||||
) -> str:
|
||||
if t is None:
|
||||
return "-"
|
||||
if namespace_manager:
|
||||
if isinstance(t, URIRef):
|
||||
return namespace_manager.normalizeUri(t)
|
||||
elif isinstance(t, BNode):
|
||||
return t.n3()
|
||||
elif isinstance(t, Literal):
|
||||
return t._literal_n3(qname_callback=namespace_manager.normalizeUri)
|
||||
else:
|
||||
return t.n3()
|
||||
|
||||
|
||||
class TXTResultSerializer(ResultSerializer):
|
||||
"""
|
||||
A write-only QueryResult serializer for text/ascii tables
|
||||
"""
|
||||
|
||||
def serialize(
|
||||
self,
|
||||
stream: IO,
|
||||
encoding: str = "utf-8",
|
||||
*,
|
||||
namespace_manager: Optional[NamespaceManager] = None,
|
||||
**kwargs,
|
||||
) -> None:
|
||||
"""
|
||||
return a text table of query results
|
||||
"""
|
||||
|
||||
def c(s, w):
|
||||
"""
|
||||
center the string s in w wide string
|
||||
"""
|
||||
w -= len(s)
|
||||
h1 = h2 = w // 2
|
||||
if w % 2:
|
||||
h2 += 1
|
||||
return " " * h1 + s + " " * h2
|
||||
|
||||
if self.result.type != "SELECT":
|
||||
raise Exception("Can only pretty print SELECT results!")
|
||||
string_stream = StringIO()
|
||||
if not self.result:
|
||||
string_stream.write("(no results)\n")
|
||||
else:
|
||||
keys: List[Variable] = self.result.vars # type: ignore[assignment]
|
||||
maxlen = [0] * len(keys)
|
||||
b = [
|
||||
# type error: Value of type "Union[Tuple[Node, Node, Node], bool, ResultRow]" is not indexable
|
||||
# type error: Argument 1 to "_termString" has incompatible type "Union[Node, Any]"; expected "Union[URIRef, Literal, BNode, None]" [arg-type]
|
||||
# type error: No overload variant of "__getitem__" of "tuple" matches argument type "Variable"
|
||||
# NOTE on type error: The problem here is that r can be more types than _termString expects because result can be a result of multiple types.
|
||||
[_termString(r[k], namespace_manager) for k in keys] # type: ignore[index, arg-type, call-overload]
|
||||
for r in self.result
|
||||
]
|
||||
for r in b:
|
||||
for i in range(len(keys)):
|
||||
maxlen[i] = max(maxlen[i], len(r[i]))
|
||||
string_stream.write(
|
||||
"|".join([c(k, maxlen[i]) for i, k in enumerate(keys)]) + "\n"
|
||||
)
|
||||
string_stream.write("-" * (len(maxlen) + sum(maxlen)) + "\n")
|
||||
for r in sorted(b):
|
||||
string_stream.write(
|
||||
"|".join([t + " " * (i - len(t)) for i, t in zip(maxlen, r)]) + "\n"
|
||||
)
|
||||
text_val = string_stream.getvalue()
|
||||
try:
|
||||
stream.write(text_val.encode(encoding))
|
||||
except (TypeError, ValueError):
|
||||
stream.write(text_val)
|
||||
+301
@@ -0,0 +1,301 @@
|
||||
"""A Parser for SPARQL results in XML:
|
||||
|
||||
http://www.w3.org/TR/rdf-sparql-XMLres/
|
||||
|
||||
Bits and pieces borrowed from:
|
||||
http://projects.bigasterisk.com/sparqlhttp/
|
||||
|
||||
Authors: Drew Perttula, Gunnar Aastrand Grimnes
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import xml.etree.ElementTree as xml_etree # noqa: N813
|
||||
from io import BytesIO
|
||||
from typing import (
|
||||
IO,
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
BinaryIO,
|
||||
Dict,
|
||||
Optional,
|
||||
Sequence,
|
||||
TextIO,
|
||||
Tuple,
|
||||
Union,
|
||||
cast,
|
||||
)
|
||||
from xml.dom import XML_NAMESPACE
|
||||
from xml.sax.saxutils import XMLGenerator
|
||||
from xml.sax.xmlreader import AttributesNSImpl
|
||||
|
||||
from rdflib.query import Result, ResultException, ResultParser, ResultSerializer
|
||||
from rdflib.term import BNode, Identifier, Literal, URIRef, Variable
|
||||
|
||||
try:
|
||||
# https://adamj.eu/tech/2021/12/29/python-type-hints-optional-imports/
|
||||
import lxml.etree as lxml_etree
|
||||
|
||||
FOUND_LXML = True
|
||||
except ImportError:
|
||||
FOUND_LXML = False
|
||||
|
||||
SPARQL_XML_NAMESPACE = "http://www.w3.org/2005/sparql-results#"
|
||||
RESULTS_NS_ET = "{%s}" % SPARQL_XML_NAMESPACE
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class XMLResultParser(ResultParser):
|
||||
# TODO FIXME: content_type should be a keyword only arg.
|
||||
def parse(self, source: IO, content_type: Optional[str] = None) -> Result: # type: ignore[override]
|
||||
return XMLResult(source)
|
||||
|
||||
|
||||
class XMLResult(Result):
|
||||
def __init__(self, source: IO, content_type: Optional[str] = None):
|
||||
parser_encoding: Optional[str] = None
|
||||
if hasattr(source, "encoding"):
|
||||
if TYPE_CHECKING:
|
||||
assert isinstance(source, TextIO)
|
||||
parser_encoding = "utf-8"
|
||||
source_str = source.read()
|
||||
source = BytesIO(source_str.encode(parser_encoding))
|
||||
else:
|
||||
if TYPE_CHECKING:
|
||||
assert isinstance(source, BinaryIO)
|
||||
|
||||
if FOUND_LXML:
|
||||
lxml_parser = lxml_etree.XMLParser(huge_tree=True, encoding=parser_encoding)
|
||||
tree = cast(
|
||||
xml_etree.ElementTree,
|
||||
lxml_etree.parse(source, parser=lxml_parser),
|
||||
)
|
||||
else:
|
||||
xml_parser = xml_etree.XMLParser(encoding=parser_encoding)
|
||||
tree = xml_etree.parse(source, parser=xml_parser)
|
||||
|
||||
boolean = tree.find(RESULTS_NS_ET + "boolean")
|
||||
results = tree.find(RESULTS_NS_ET + "results")
|
||||
|
||||
if boolean is not None:
|
||||
type_ = "ASK"
|
||||
elif results is not None:
|
||||
type_ = "SELECT"
|
||||
else:
|
||||
raise ResultException("No RDF result-bindings or boolean answer found!")
|
||||
|
||||
Result.__init__(self, type_)
|
||||
|
||||
if type_ == "SELECT":
|
||||
self.bindings = []
|
||||
for result in results: # type: ignore[union-attr]
|
||||
if result.tag != f"{RESULTS_NS_ET}result":
|
||||
# This is here because with lxml this also gets comments,
|
||||
# not just elements. Also this should not operate on non
|
||||
# "result" elements.
|
||||
continue
|
||||
r = {}
|
||||
for binding in result:
|
||||
if binding.tag != f"{RESULTS_NS_ET}binding":
|
||||
# This is here because with lxml this also gets
|
||||
# comments, not just elements. Also this should not
|
||||
# operate on non "binding" elements.
|
||||
continue
|
||||
# type error: error: Argument 1 to "Variable" has incompatible type "Union[str, None, Any]"; expected "str"
|
||||
# NOTE on type error: Element.get() can return None, and
|
||||
# this will invariably fail if passed into Variable
|
||||
# constructor as value
|
||||
r[Variable(binding.get("name"))] = parseTerm(binding[0]) # type: ignore[arg-type] # FIXME
|
||||
self.bindings.append(r)
|
||||
|
||||
self.vars = [
|
||||
# type error: Argument 1 to "Variable" has incompatible type "Optional[str]"; expected "str"
|
||||
# NOTE on type error: Element.get() can return None, and this
|
||||
# will invariably fail if passed into Variable constructor as
|
||||
# value
|
||||
Variable(x.get("name")) # type: ignore[arg-type] # FIXME
|
||||
for x in tree.findall(
|
||||
"./%shead/%svariable" % (RESULTS_NS_ET, RESULTS_NS_ET)
|
||||
)
|
||||
]
|
||||
|
||||
else:
|
||||
self.askAnswer = boolean.text.lower().strip() == "true" # type: ignore[union-attr]
|
||||
|
||||
|
||||
def parseTerm(element: xml_etree.Element) -> Union[URIRef, Literal, BNode]:
|
||||
"""rdflib object (Literal, URIRef, BNode) for the given
|
||||
elementtree element"""
|
||||
tag, text = element.tag, element.text
|
||||
if tag == RESULTS_NS_ET + "literal":
|
||||
if text is None:
|
||||
text = ""
|
||||
datatype = None
|
||||
lang = None
|
||||
if element.get("datatype", None):
|
||||
# type error: Argument 1 to "URIRef" has incompatible type "Optional[str]"; expected "str"
|
||||
datatype = URIRef(element.get("datatype")) # type: ignore[arg-type]
|
||||
elif element.get("{%s}lang" % XML_NAMESPACE, None):
|
||||
lang = element.get("{%s}lang" % XML_NAMESPACE)
|
||||
|
||||
ret = Literal(text, datatype=datatype, lang=lang)
|
||||
|
||||
return ret
|
||||
elif tag == RESULTS_NS_ET + "uri":
|
||||
# type error: Argument 1 to "URIRef" has incompatible type "Optional[str]"; expected "str"
|
||||
return URIRef(text) # type: ignore[arg-type]
|
||||
elif tag == RESULTS_NS_ET + "bnode":
|
||||
return BNode(text)
|
||||
else:
|
||||
raise TypeError("unknown binding type %r" % element)
|
||||
|
||||
|
||||
class XMLResultSerializer(ResultSerializer):
|
||||
def __init__(self, result: Result):
|
||||
ResultSerializer.__init__(self, result)
|
||||
|
||||
def serialize(self, stream: IO, encoding: str = "utf-8", **kwargs: Any) -> None:
|
||||
writer = SPARQLXMLWriter(stream, encoding)
|
||||
if self.result.type == "ASK":
|
||||
writer.write_header([])
|
||||
# type error: Argument 1 to "write_ask" of "SPARQLXMLWriter" has incompatible type "Optional[bool]"; expected "bool"
|
||||
writer.write_ask(self.result.askAnswer) # type: ignore[arg-type]
|
||||
else:
|
||||
# type error: Argument 1 to "write_header" of "SPARQLXMLWriter" has incompatible type "Optional[List[Variable]]"; expected "Sequence[Variable]"
|
||||
writer.write_header(self.result.vars) # type: ignore[arg-type]
|
||||
writer.write_results_header()
|
||||
for b in self.result.bindings:
|
||||
writer.write_start_result()
|
||||
for key, val in b.items():
|
||||
writer.write_binding(key, val)
|
||||
|
||||
writer.write_end_result()
|
||||
|
||||
writer.close()
|
||||
|
||||
|
||||
# TODO: Rewrite with ElementTree?
|
||||
class SPARQLXMLWriter:
|
||||
"""
|
||||
Python saxutils-based SPARQL XML Writer
|
||||
"""
|
||||
|
||||
def __init__(self, output: IO, encoding: str = "utf-8"):
|
||||
writer = XMLGenerator(output, encoding)
|
||||
writer.startDocument()
|
||||
writer.startPrefixMapping("", SPARQL_XML_NAMESPACE)
|
||||
writer.startPrefixMapping("xml", XML_NAMESPACE)
|
||||
writer.startElementNS(
|
||||
(SPARQL_XML_NAMESPACE, "sparql"), "sparql", AttributesNSImpl({}, {})
|
||||
)
|
||||
self.writer = writer
|
||||
self._output = output
|
||||
self._encoding = encoding
|
||||
self._results = False
|
||||
|
||||
def write_header(self, allvarsL: Sequence[Variable]) -> None:
|
||||
self.writer.startElementNS(
|
||||
(SPARQL_XML_NAMESPACE, "head"), "head", AttributesNSImpl({}, {})
|
||||
)
|
||||
for i in range(0, len(allvarsL)):
|
||||
attr_vals = {
|
||||
(None, "name"): str(allvarsL[i]),
|
||||
}
|
||||
attr_qnames = {
|
||||
(None, "name"): "name",
|
||||
}
|
||||
self.writer.startElementNS(
|
||||
(SPARQL_XML_NAMESPACE, "variable"),
|
||||
"variable",
|
||||
# type error: Argument 1 to "AttributesNSImpl" has incompatible type "Dict[Tuple[None, str], str]"; expected "Mapping[Tuple[str, str], str]"
|
||||
# type error: Argument 2 to "AttributesNSImpl" has incompatible type "Dict[Tuple[None, str], str]"; expected "Mapping[Tuple[str, str], str]" [arg-type]
|
||||
AttributesNSImpl(attr_vals, attr_qnames), # type: ignore[arg-type]
|
||||
)
|
||||
self.writer.endElementNS((SPARQL_XML_NAMESPACE, "variable"), "variable")
|
||||
self.writer.endElementNS((SPARQL_XML_NAMESPACE, "head"), "head")
|
||||
|
||||
def write_ask(self, val: bool) -> None:
|
||||
self.writer.startElementNS(
|
||||
(SPARQL_XML_NAMESPACE, "boolean"), "boolean", AttributesNSImpl({}, {})
|
||||
)
|
||||
self.writer.characters(str(val).lower())
|
||||
self.writer.endElementNS((SPARQL_XML_NAMESPACE, "boolean"), "boolean")
|
||||
|
||||
def write_results_header(self) -> None:
|
||||
self.writer.startElementNS(
|
||||
(SPARQL_XML_NAMESPACE, "results"), "results", AttributesNSImpl({}, {})
|
||||
)
|
||||
self._results = True
|
||||
|
||||
def write_start_result(self) -> None:
|
||||
self.writer.startElementNS(
|
||||
(SPARQL_XML_NAMESPACE, "result"), "result", AttributesNSImpl({}, {})
|
||||
)
|
||||
self._resultStarted = True
|
||||
|
||||
def write_end_result(self) -> None:
|
||||
assert self._resultStarted
|
||||
self.writer.endElementNS((SPARQL_XML_NAMESPACE, "result"), "result")
|
||||
self._resultStarted = False
|
||||
|
||||
def write_binding(self, name: Variable, val: Identifier) -> None:
|
||||
assert self._resultStarted
|
||||
|
||||
attr_vals: Dict[Tuple[Optional[str], str], str] = {
|
||||
(None, "name"): str(name),
|
||||
}
|
||||
attr_qnames: Dict[Tuple[Optional[str], str], str] = {
|
||||
(None, "name"): "name",
|
||||
}
|
||||
self.writer.startElementNS(
|
||||
(SPARQL_XML_NAMESPACE, "binding"),
|
||||
"binding",
|
||||
# type error: Argument 1 to "AttributesNSImpl" has incompatible type "Dict[Tuple[None, str], str]"; expected "Mapping[Tuple[str, str], str]"
|
||||
# type error: Argument 2 to "AttributesNSImpl" has incompatible type "Dict[Tuple[None, str], str]"; expected "Mapping[Tuple[str, str], str]"
|
||||
AttributesNSImpl(attr_vals, attr_qnames), # type: ignore[arg-type, unused-ignore]
|
||||
)
|
||||
|
||||
if isinstance(val, URIRef):
|
||||
self.writer.startElementNS(
|
||||
(SPARQL_XML_NAMESPACE, "uri"), "uri", AttributesNSImpl({}, {})
|
||||
)
|
||||
self.writer.characters(val)
|
||||
self.writer.endElementNS((SPARQL_XML_NAMESPACE, "uri"), "uri")
|
||||
elif isinstance(val, BNode):
|
||||
self.writer.startElementNS(
|
||||
(SPARQL_XML_NAMESPACE, "bnode"), "bnode", AttributesNSImpl({}, {})
|
||||
)
|
||||
self.writer.characters(val)
|
||||
self.writer.endElementNS((SPARQL_XML_NAMESPACE, "bnode"), "bnode")
|
||||
elif isinstance(val, Literal):
|
||||
attr_vals = {}
|
||||
attr_qnames = {}
|
||||
if val.language:
|
||||
attr_vals[(XML_NAMESPACE, "lang")] = val.language
|
||||
attr_qnames[(XML_NAMESPACE, "lang")] = "xml:lang"
|
||||
elif val.datatype:
|
||||
attr_vals[(None, "datatype")] = val.datatype
|
||||
attr_qnames[(None, "datatype")] = "datatype"
|
||||
|
||||
self.writer.startElementNS(
|
||||
(SPARQL_XML_NAMESPACE, "literal"),
|
||||
"literal",
|
||||
# type error: Argument 1 to "AttributesNSImpl" has incompatible type "Dict[Tuple[Optional[str], str], str]"; expected "Mapping[Tuple[str, str], str]"
|
||||
# type error: Argument 2 to "AttributesNSImpl" has incompatible type "Dict[Tuple[Optional[str], str], str]"; expected "Mapping[Tuple[str, str], str]"
|
||||
AttributesNSImpl(attr_vals, attr_qnames), # type: ignore[arg-type, unused-ignore]
|
||||
)
|
||||
self.writer.characters(val)
|
||||
self.writer.endElementNS((SPARQL_XML_NAMESPACE, "literal"), "literal")
|
||||
|
||||
else:
|
||||
raise Exception("Unsupported RDF term: %s" % val)
|
||||
|
||||
self.writer.endElementNS((SPARQL_XML_NAMESPACE, "binding"), "binding")
|
||||
|
||||
def close(self) -> None:
|
||||
if self._results:
|
||||
self.writer.endElementNS((SPARQL_XML_NAMESPACE, "results"), "results")
|
||||
self.writer.endElementNS((SPARQL_XML_NAMESPACE, "sparql"), "sparql")
|
||||
self.writer.endDocument()
|
||||
@@ -0,0 +1,499 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import collections
|
||||
import datetime
|
||||
import itertools
|
||||
import typing as t
|
||||
from collections.abc import Mapping, MutableMapping
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Container,
|
||||
Dict,
|
||||
Generator,
|
||||
Iterable,
|
||||
List,
|
||||
Optional,
|
||||
Tuple,
|
||||
TypeVar,
|
||||
Union,
|
||||
)
|
||||
|
||||
import rdflib.plugins.sparql
|
||||
from rdflib.graph import ConjunctiveGraph, Dataset, Graph
|
||||
from rdflib.namespace import NamespaceManager
|
||||
from rdflib.plugins.sparql.parserutils import CompValue
|
||||
from rdflib.term import BNode, Identifier, Literal, Node, URIRef, Variable
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from rdflib.paths import Path
|
||||
|
||||
|
||||
_AnyT = TypeVar("_AnyT")
|
||||
|
||||
|
||||
class SPARQLError(Exception):
|
||||
def __init__(self, msg: Optional[str] = None):
|
||||
Exception.__init__(self, msg)
|
||||
|
||||
|
||||
class NotBoundError(SPARQLError):
|
||||
def __init__(self, msg: Optional[str] = None):
|
||||
SPARQLError.__init__(self, msg)
|
||||
|
||||
|
||||
class AlreadyBound(SPARQLError): # noqa: N818
|
||||
"""Raised when trying to bind a variable that is already bound!"""
|
||||
|
||||
def __init__(self):
|
||||
SPARQLError.__init__(self)
|
||||
|
||||
|
||||
class SPARQLTypeError(SPARQLError):
|
||||
def __init__(self, msg: Optional[str]):
|
||||
SPARQLError.__init__(self, msg)
|
||||
|
||||
|
||||
class Bindings(MutableMapping):
|
||||
"""
|
||||
|
||||
A single level of a stack of variable-value bindings.
|
||||
Each dict keeps a reference to the dict below it,
|
||||
any failed lookup is propegated back
|
||||
|
||||
In python 3.3 this could be a collections.ChainMap
|
||||
"""
|
||||
|
||||
def __init__(self, outer: Optional[Bindings] = None, d=[]):
|
||||
self._d: Dict[str, str] = dict(d)
|
||||
self.outer = outer
|
||||
|
||||
def __getitem__(self, key: str) -> str:
|
||||
if key in self._d:
|
||||
return self._d[key]
|
||||
|
||||
if not self.outer:
|
||||
raise KeyError()
|
||||
return self.outer[key]
|
||||
|
||||
def __contains__(self, key: Any) -> bool:
|
||||
try:
|
||||
self[key]
|
||||
return True
|
||||
except KeyError:
|
||||
return False
|
||||
|
||||
def __setitem__(self, key: str, value: Any) -> None:
|
||||
self._d[key] = value
|
||||
|
||||
def __delitem__(self, key: str) -> None:
|
||||
raise Exception("DelItem is not implemented!")
|
||||
|
||||
def __len__(self) -> int:
|
||||
i = 0
|
||||
d: Optional[Bindings] = self
|
||||
while d is not None:
|
||||
i += len(d._d)
|
||||
d = d.outer
|
||||
return i
|
||||
|
||||
def __iter__(self) -> Generator[str, None, None]:
|
||||
d: Optional[Bindings] = self
|
||||
while d is not None:
|
||||
yield from d._d
|
||||
d = d.outer
|
||||
|
||||
def __str__(self) -> str:
|
||||
# type error: Generator has incompatible item type "Tuple[Any, str]"; expected "str"
|
||||
return "Bindings({" + ", ".join((k, self[k]) for k in self) + "})" # type: ignore[misc]
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return str(self)
|
||||
|
||||
|
||||
class FrozenDict(Mapping):
|
||||
"""
|
||||
An immutable hashable dict
|
||||
|
||||
Taken from http://stackoverflow.com/a/2704866/81121
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, *args: Any, **kwargs: Any):
|
||||
self._d: Dict[Identifier, Identifier] = dict(*args, **kwargs)
|
||||
self._hash: Optional[int] = None
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self._d)
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self._d)
|
||||
|
||||
def __getitem__(self, key: Identifier) -> Identifier:
|
||||
return self._d[key]
|
||||
|
||||
def __hash__(self) -> int:
|
||||
# It would have been simpler and maybe more obvious to
|
||||
# use hash(tuple(sorted(self._d.items()))) from this discussion
|
||||
# so far, but this solution is O(n). I don't know what kind of
|
||||
# n we are going to run into, but sometimes it's hard to resist the
|
||||
# urge to optimize when it will gain improved algorithmic performance.
|
||||
if self._hash is None:
|
||||
self._hash = 0
|
||||
for key, value in self.items():
|
||||
self._hash ^= hash(key)
|
||||
self._hash ^= hash(value)
|
||||
return self._hash
|
||||
|
||||
def project(self, vars: Container[Variable]) -> FrozenDict:
|
||||
return FrozenDict(x for x in self.items() if x[0] in vars)
|
||||
|
||||
def disjointDomain(self, other: t.Mapping[Identifier, Identifier]) -> bool:
|
||||
return not bool(set(self).intersection(other))
|
||||
|
||||
def compatible(self, other: t.Mapping[Identifier, Identifier]) -> bool:
|
||||
for k in self:
|
||||
try:
|
||||
if self[k] != other[k]:
|
||||
return False
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
return True
|
||||
|
||||
def merge(self, other: t.Mapping[Identifier, Identifier]) -> FrozenDict:
|
||||
res = FrozenDict(itertools.chain(self.items(), other.items()))
|
||||
|
||||
return res
|
||||
|
||||
def __str__(self) -> str:
|
||||
return str(self._d)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return repr(self._d)
|
||||
|
||||
|
||||
class FrozenBindings(FrozenDict):
|
||||
def __init__(self, ctx: QueryContext, *args, **kwargs):
|
||||
FrozenDict.__init__(self, *args, **kwargs)
|
||||
self.ctx = ctx
|
||||
|
||||
def __getitem__(self, key: Union[Identifier, str]) -> Identifier:
|
||||
if not isinstance(key, Node):
|
||||
key = Variable(key)
|
||||
|
||||
if not isinstance(key, (BNode, Variable)):
|
||||
return key
|
||||
|
||||
if key not in self._d:
|
||||
# type error: Value of type "Optional[Dict[Variable, Identifier]]" is not indexable
|
||||
# type error: Invalid index type "Union[BNode, Variable]" for "Optional[Dict[Variable, Identifier]]"; expected type "Variable"
|
||||
return self.ctx.initBindings[key] # type: ignore[index]
|
||||
else:
|
||||
return self._d[key]
|
||||
|
||||
def project(self, vars: Container[Variable]) -> FrozenBindings:
|
||||
return FrozenBindings(self.ctx, (x for x in self.items() if x[0] in vars))
|
||||
|
||||
def merge(self, other: t.Mapping[Identifier, Identifier]) -> FrozenBindings:
|
||||
res = FrozenBindings(self.ctx, itertools.chain(self.items(), other.items()))
|
||||
return res
|
||||
|
||||
@property
|
||||
def now(self) -> datetime.datetime:
|
||||
return self.ctx.now
|
||||
|
||||
@property
|
||||
def bnodes(self) -> t.Mapping[Identifier, BNode]:
|
||||
return self.ctx.bnodes
|
||||
|
||||
@property
|
||||
def prologue(self) -> Optional[Prologue]:
|
||||
return self.ctx.prologue
|
||||
|
||||
def forget(
|
||||
self, before: QueryContext, _except: Optional[Container[Variable]] = None
|
||||
) -> FrozenBindings:
|
||||
"""
|
||||
return a frozen dict only of bindings made in self
|
||||
since before
|
||||
"""
|
||||
if not _except:
|
||||
_except = []
|
||||
|
||||
# bindings from initBindings are newer forgotten
|
||||
return FrozenBindings(
|
||||
self.ctx,
|
||||
(
|
||||
x
|
||||
for x in self.items()
|
||||
if (
|
||||
x[0] in _except
|
||||
# type error: Unsupported right operand type for in ("Optional[Dict[Variable, Identifier]]")
|
||||
or x[0] in self.ctx.initBindings # type: ignore[operator]
|
||||
or before[x[0]] is None
|
||||
)
|
||||
),
|
||||
)
|
||||
|
||||
def remember(self, these) -> FrozenBindings:
|
||||
"""
|
||||
return a frozen dict only of bindings in these
|
||||
"""
|
||||
return FrozenBindings(self.ctx, (x for x in self.items() if x[0] in these))
|
||||
|
||||
|
||||
class QueryContext:
|
||||
"""
|
||||
Query context - passed along when evaluating the query
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
graph: Optional[Graph] = None,
|
||||
bindings: Optional[Union[Bindings, FrozenBindings, List[Any]]] = None,
|
||||
initBindings: Optional[Mapping[str, Identifier]] = None,
|
||||
datasetClause=None,
|
||||
):
|
||||
self.initBindings = initBindings
|
||||
self.bindings = Bindings(d=bindings or [])
|
||||
if initBindings:
|
||||
self.bindings.update(initBindings)
|
||||
|
||||
self.graph: Optional[Graph]
|
||||
self._dataset: Optional[Union[Dataset, ConjunctiveGraph]]
|
||||
if isinstance(graph, (Dataset, ConjunctiveGraph)):
|
||||
if datasetClause:
|
||||
self._dataset = Dataset()
|
||||
self.graph = Graph()
|
||||
for d in datasetClause:
|
||||
if d.default:
|
||||
from_graph = graph.get_context(d.default)
|
||||
self.graph += from_graph
|
||||
if not from_graph:
|
||||
self.load(d.default, default=True)
|
||||
elif d.named:
|
||||
namedGraphs = Graph(
|
||||
store=self.dataset.store, identifier=d.named
|
||||
)
|
||||
from_named_graphs = graph.get_context(d.named)
|
||||
namedGraphs += from_named_graphs
|
||||
if not from_named_graphs:
|
||||
self.load(d.named, default=False)
|
||||
else:
|
||||
self._dataset = graph
|
||||
if rdflib.plugins.sparql.SPARQL_DEFAULT_GRAPH_UNION:
|
||||
self.graph = self.dataset
|
||||
else:
|
||||
self.graph = self.dataset.default_context
|
||||
else:
|
||||
self._dataset = None
|
||||
self.graph = graph
|
||||
|
||||
self.prologue: Optional[Prologue] = None
|
||||
self._now: Optional[datetime.datetime] = None
|
||||
|
||||
self.bnodes: t.MutableMapping[Identifier, BNode] = collections.defaultdict(
|
||||
BNode
|
||||
)
|
||||
|
||||
@property
|
||||
def now(self) -> datetime.datetime:
|
||||
if self._now is None:
|
||||
self._now = datetime.datetime.now(datetime.timezone.utc)
|
||||
return self._now
|
||||
|
||||
def clone(
|
||||
self, bindings: Optional[Union[FrozenBindings, Bindings, List[Any]]] = None
|
||||
) -> QueryContext:
|
||||
r = QueryContext(
|
||||
self._dataset if self._dataset is not None else self.graph,
|
||||
bindings or self.bindings,
|
||||
initBindings=self.initBindings,
|
||||
)
|
||||
r.prologue = self.prologue
|
||||
r.graph = self.graph
|
||||
r.bnodes = self.bnodes
|
||||
return r
|
||||
|
||||
@property
|
||||
def dataset(self) -> ConjunctiveGraph:
|
||||
""" "current dataset"""
|
||||
if self._dataset is None:
|
||||
raise Exception(
|
||||
"You performed a query operation requiring "
|
||||
+ "a dataset (i.e. ConjunctiveGraph), but "
|
||||
+ "operating currently on a single graph."
|
||||
)
|
||||
return self._dataset
|
||||
|
||||
def load(
|
||||
self,
|
||||
source: URIRef,
|
||||
default: bool = False,
|
||||
into: Optional[Identifier] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""
|
||||
Load data from the source into the query context's.
|
||||
|
||||
:param source: The source to load from.
|
||||
:param default: If `True`, triples from the source will be added
|
||||
to the default graph, otherwise it will be loaded into a
|
||||
graph with ``source`` URI as its name.
|
||||
:param into: The name of the graph to load the data into. If
|
||||
`None`, the source URI will be used as as the name of the
|
||||
graph.
|
||||
:param kwargs: Keyword arguments to pass to
|
||||
:meth:`rdflib.graph.Graph.parse`.
|
||||
"""
|
||||
|
||||
def _load(graph, source):
|
||||
try:
|
||||
return graph.parse(source, format="turtle", **kwargs)
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
return graph.parse(source, format="xml", **kwargs)
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
return graph.parse(source, format="n3", **kwargs)
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
return graph.parse(source, format="nt", **kwargs)
|
||||
except Exception:
|
||||
raise Exception(
|
||||
"Could not load %s as either RDF/XML, N3 or NTriples" % source
|
||||
)
|
||||
|
||||
if not rdflib.plugins.sparql.SPARQL_LOAD_GRAPHS:
|
||||
# we are not loading - if we already know the graph
|
||||
# being "loaded", just add it to the default-graph
|
||||
if default:
|
||||
# Unsupported left operand type for + ("None")
|
||||
self.graph += self.dataset.get_context(source) # type: ignore[operator]
|
||||
else:
|
||||
if default:
|
||||
_load(self.graph, source)
|
||||
else:
|
||||
if into is None:
|
||||
into = source
|
||||
_load(self.dataset.get_context(into), source)
|
||||
|
||||
def __getitem__(self, key: Union[str, Path]) -> Optional[Union[str, Path]]:
|
||||
# in SPARQL BNodes are just labels
|
||||
if not isinstance(key, (BNode, Variable)):
|
||||
return key
|
||||
try:
|
||||
return self.bindings[key]
|
||||
except KeyError:
|
||||
return None
|
||||
|
||||
def get(self, key: str, default: Optional[Any] = None) -> Any:
|
||||
try:
|
||||
return self[key]
|
||||
except KeyError:
|
||||
return default
|
||||
|
||||
def solution(self, vars: Optional[Iterable[Variable]] = None) -> FrozenBindings:
|
||||
"""
|
||||
Return a static copy of the current variable bindings as dict
|
||||
"""
|
||||
if vars:
|
||||
return FrozenBindings(
|
||||
self, ((k, v) for k, v in self.bindings.items() if k in vars)
|
||||
)
|
||||
else:
|
||||
return FrozenBindings(self, self.bindings.items())
|
||||
|
||||
def __setitem__(self, key: str, value: str) -> None:
|
||||
if key in self.bindings and self.bindings[key] != value:
|
||||
raise AlreadyBound()
|
||||
|
||||
self.bindings[key] = value
|
||||
|
||||
def pushGraph(self, graph: Optional[Graph]) -> QueryContext:
|
||||
r = self.clone()
|
||||
r.graph = graph
|
||||
return r
|
||||
|
||||
def push(self) -> QueryContext:
|
||||
r = self.clone(Bindings(self.bindings))
|
||||
return r
|
||||
|
||||
def clean(self) -> QueryContext:
|
||||
return self.clone([])
|
||||
|
||||
def thaw(self, frozenbindings: FrozenBindings) -> QueryContext:
|
||||
"""
|
||||
Create a new read/write query context from the given solution
|
||||
"""
|
||||
c = self.clone(frozenbindings)
|
||||
|
||||
return c
|
||||
|
||||
|
||||
class Prologue:
|
||||
"""
|
||||
A class for holding prefixing bindings and base URI information
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.base: Optional[str] = None
|
||||
self.namespace_manager = NamespaceManager(Graph()) # ns man needs a store
|
||||
|
||||
def resolvePName(self, prefix: Optional[str], localname: Optional[str]) -> URIRef:
|
||||
ns = self.namespace_manager.store.namespace(prefix or "")
|
||||
if ns is None:
|
||||
raise Exception("Unknown namespace prefix : %s" % prefix)
|
||||
return URIRef(ns + (localname or ""))
|
||||
|
||||
def bind(self, prefix: Optional[str], uri: Any) -> None:
|
||||
self.namespace_manager.bind(prefix, uri, replace=True)
|
||||
|
||||
def absolutize(
|
||||
self, iri: Optional[Union[CompValue, str]]
|
||||
) -> Optional[Union[CompValue, str]]:
|
||||
"""
|
||||
Apply BASE / PREFIXes to URIs
|
||||
(and to datatypes in Literals)
|
||||
|
||||
TODO: Move resolving URIs to pre-processing
|
||||
"""
|
||||
|
||||
if isinstance(iri, CompValue):
|
||||
if iri.name == "pname":
|
||||
return self.resolvePName(iri.prefix, iri.localname)
|
||||
if iri.name == "literal":
|
||||
# type error: Argument "datatype" to "Literal" has incompatible type "Union[CompValue, Identifier, None]"; expected "Optional[str]"
|
||||
return Literal(
|
||||
iri.string, lang=iri.lang, datatype=self.absolutize(iri.datatype) # type: ignore[arg-type]
|
||||
)
|
||||
elif isinstance(iri, URIRef) and not ":" in iri: # noqa: E713
|
||||
return URIRef(iri, base=self.base)
|
||||
|
||||
return iri
|
||||
|
||||
|
||||
class Query:
|
||||
"""
|
||||
A parsed and translated query
|
||||
"""
|
||||
|
||||
def __init__(self, prologue: Prologue, algebra: CompValue):
|
||||
self.prologue = prologue
|
||||
self.algebra = algebra
|
||||
self._original_args: Tuple[str, Mapping[str, str], Optional[str]]
|
||||
|
||||
|
||||
class Update:
|
||||
"""
|
||||
A parsed and translated update
|
||||
"""
|
||||
|
||||
def __init__(self, prologue: Prologue, algebra: List[CompValue]):
|
||||
self.prologue = prologue
|
||||
self.algebra = algebra
|
||||
self._original_args: Tuple[str, Mapping[str, str], Optional[str]]
|
||||
@@ -0,0 +1,353 @@
|
||||
"""
|
||||
|
||||
Code for carrying out Update Operations
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Iterator, Mapping, Optional, Sequence
|
||||
|
||||
from rdflib.graph import Graph
|
||||
from rdflib.plugins.sparql.evaluate import evalBGP, evalPart
|
||||
from rdflib.plugins.sparql.evalutils import _fillTemplate, _join
|
||||
from rdflib.plugins.sparql.parserutils import CompValue
|
||||
from rdflib.plugins.sparql.sparql import FrozenDict, QueryContext, Update
|
||||
from rdflib.term import Identifier, URIRef, Variable
|
||||
|
||||
|
||||
def _graphOrDefault(ctx: QueryContext, g: str) -> Optional[Graph]:
|
||||
if g == "DEFAULT":
|
||||
return ctx.graph
|
||||
else:
|
||||
return ctx.dataset.get_context(g)
|
||||
|
||||
|
||||
def _graphAll(ctx: QueryContext, g: str) -> Sequence[Graph]:
|
||||
"""
|
||||
return a list of graphs
|
||||
"""
|
||||
if g == "DEFAULT":
|
||||
# type error: List item 0 has incompatible type "Optional[Graph]"; expected "Graph"
|
||||
return [ctx.graph] # type: ignore[list-item]
|
||||
elif g == "NAMED":
|
||||
return [
|
||||
# type error: Item "None" of "Optional[Graph]" has no attribute "identifier"
|
||||
c
|
||||
for c in ctx.dataset.contexts()
|
||||
if c.identifier != ctx.graph.identifier # type: ignore[union-attr]
|
||||
]
|
||||
elif g == "ALL":
|
||||
return list(ctx.dataset.contexts())
|
||||
else:
|
||||
return [ctx.dataset.get_context(g)]
|
||||
|
||||
|
||||
def evalLoad(ctx: QueryContext, u: CompValue) -> None:
|
||||
"""
|
||||
http://www.w3.org/TR/sparql11-update/#load
|
||||
"""
|
||||
|
||||
if TYPE_CHECKING:
|
||||
assert isinstance(u.iri, URIRef)
|
||||
|
||||
if u.graphiri:
|
||||
ctx.load(u.iri, default=False, into=u.graphiri)
|
||||
else:
|
||||
ctx.load(u.iri, default=True)
|
||||
|
||||
|
||||
def evalCreate(ctx: QueryContext, u: CompValue) -> None:
|
||||
"""
|
||||
http://www.w3.org/TR/sparql11-update/#create
|
||||
"""
|
||||
g = ctx.dataset.get_context(u.graphiri)
|
||||
if len(g) > 0:
|
||||
raise Exception("Graph %s already exists." % g.identifier)
|
||||
raise Exception("Create not implemented!")
|
||||
|
||||
|
||||
def evalClear(ctx: QueryContext, u: CompValue) -> None:
|
||||
"""
|
||||
http://www.w3.org/TR/sparql11-update/#clear
|
||||
"""
|
||||
for g in _graphAll(ctx, u.graphiri):
|
||||
g.remove((None, None, None))
|
||||
|
||||
|
||||
def evalDrop(ctx: QueryContext, u: CompValue) -> None:
|
||||
"""
|
||||
http://www.w3.org/TR/sparql11-update/#drop
|
||||
"""
|
||||
if ctx.dataset.store.graph_aware:
|
||||
for g in _graphAll(ctx, u.graphiri):
|
||||
ctx.dataset.store.remove_graph(g)
|
||||
else:
|
||||
evalClear(ctx, u)
|
||||
|
||||
|
||||
def evalInsertData(ctx: QueryContext, u: CompValue) -> None:
|
||||
"""
|
||||
http://www.w3.org/TR/sparql11-update/#insertData
|
||||
"""
|
||||
# add triples
|
||||
g = ctx.graph
|
||||
g += u.triples
|
||||
# add quads
|
||||
# u.quads is a dict of graphURI=>[triples]
|
||||
for g in u.quads:
|
||||
# type error: Argument 1 to "get_context" of "ConjunctiveGraph" has incompatible type "Optional[Graph]"; expected "Union[IdentifiedNode, str, None]"
|
||||
cg = ctx.dataset.get_context(g) # type: ignore[arg-type]
|
||||
cg += u.quads[g]
|
||||
|
||||
|
||||
def evalDeleteData(ctx: QueryContext, u: CompValue) -> None:
|
||||
"""
|
||||
http://www.w3.org/TR/sparql11-update/#deleteData
|
||||
"""
|
||||
# remove triples
|
||||
g = ctx.graph
|
||||
g -= u.triples
|
||||
|
||||
# remove quads
|
||||
# u.quads is a dict of graphURI=>[triples]
|
||||
for g in u.quads:
|
||||
# type error: Argument 1 to "get_context" of "ConjunctiveGraph" has incompatible type "Optional[Graph]"; expected "Union[IdentifiedNode, str, None]"
|
||||
cg = ctx.dataset.get_context(g) # type: ignore[arg-type]
|
||||
cg -= u.quads[g]
|
||||
|
||||
|
||||
def evalDeleteWhere(ctx: QueryContext, u: CompValue) -> None:
|
||||
"""
|
||||
http://www.w3.org/TR/sparql11-update/#deleteWhere
|
||||
"""
|
||||
|
||||
res: Iterator[FrozenDict] = evalBGP(ctx, u.triples)
|
||||
for g in u.quads:
|
||||
cg = ctx.dataset.get_context(g)
|
||||
c = ctx.pushGraph(cg)
|
||||
res = _join(res, list(evalBGP(c, u.quads[g])))
|
||||
|
||||
# type error: Incompatible types in assignment (expression has type "FrozenBindings", variable has type "QueryContext")
|
||||
for c in res: # type: ignore[assignment]
|
||||
g = ctx.graph
|
||||
g -= _fillTemplate(u.triples, c)
|
||||
|
||||
for g in u.quads:
|
||||
cg = ctx.dataset.get_context(c.get(g))
|
||||
cg -= _fillTemplate(u.quads[g], c)
|
||||
|
||||
|
||||
def evalModify(ctx: QueryContext, u: CompValue) -> None:
|
||||
originalctx = ctx
|
||||
|
||||
# Using replaces the dataset for evaluating the where-clause
|
||||
dg: Optional[Graph]
|
||||
if u.using:
|
||||
otherDefault = False
|
||||
for d in u.using:
|
||||
if d.default:
|
||||
if not otherDefault:
|
||||
# replace current default graph
|
||||
dg = Graph()
|
||||
ctx = ctx.pushGraph(dg)
|
||||
otherDefault = True
|
||||
|
||||
ctx.load(d.default, default=True)
|
||||
|
||||
elif d.named:
|
||||
g = d.named
|
||||
ctx.load(g, default=False)
|
||||
|
||||
# "The WITH clause provides a convenience for when an operation
|
||||
# primarily refers to a single graph. If a graph name is specified
|
||||
# in a WITH clause, then - for the purposes of evaluating the
|
||||
# WHERE clause - this will define an RDF Dataset containing a
|
||||
# default graph with the specified name, but only in the absence
|
||||
# of USING or USING NAMED clauses. In the presence of one or more
|
||||
# graphs referred to in USING clauses and/or USING NAMED clauses,
|
||||
# the WITH clause will be ignored while evaluating the WHERE
|
||||
# clause."
|
||||
if not u.using and u.withClause:
|
||||
g = ctx.dataset.get_context(u.withClause)
|
||||
ctx = ctx.pushGraph(g)
|
||||
|
||||
res = evalPart(ctx, u.where)
|
||||
|
||||
if u.using:
|
||||
if otherDefault:
|
||||
ctx = originalctx # restore original default graph
|
||||
if u.withClause:
|
||||
g = ctx.dataset.get_context(u.withClause)
|
||||
ctx = ctx.pushGraph(g)
|
||||
|
||||
for c in res:
|
||||
dg = ctx.graph
|
||||
if u.delete:
|
||||
# type error: Unsupported left operand type for - ("None")
|
||||
# type error: Unsupported operand types for - ("Graph" and "Generator[Tuple[Identifier, Identifier, Identifier], None, None]")
|
||||
dg -= _fillTemplate(u.delete.triples, c) # type: ignore[operator]
|
||||
|
||||
for g, q in u.delete.quads.items():
|
||||
cg = ctx.dataset.get_context(c.get(g))
|
||||
cg -= _fillTemplate(q, c)
|
||||
|
||||
if u.insert:
|
||||
# type error: Unsupported left operand type for + ("None")
|
||||
# type error: Unsupported operand types for + ("Graph" and "Generator[Tuple[Identifier, Identifier, Identifier], None, None]")
|
||||
dg += _fillTemplate(u.insert.triples, c) # type: ignore[operator]
|
||||
|
||||
for g, q in u.insert.quads.items():
|
||||
cg = ctx.dataset.get_context(c.get(g))
|
||||
cg += _fillTemplate(q, c)
|
||||
|
||||
|
||||
def evalAdd(ctx: QueryContext, u: CompValue) -> None:
|
||||
"""
|
||||
|
||||
add all triples from src to dst
|
||||
|
||||
http://www.w3.org/TR/sparql11-update/#add
|
||||
"""
|
||||
src, dst = u.graph
|
||||
|
||||
srcg = _graphOrDefault(ctx, src)
|
||||
dstg = _graphOrDefault(ctx, dst)
|
||||
|
||||
# type error: Item "None" of "Optional[Graph]" has no attribute "identifier"
|
||||
if srcg.identifier == dstg.identifier: # type: ignore[union-attr]
|
||||
return
|
||||
|
||||
# type error: Unsupported left operand type for + ("None")
|
||||
dstg += srcg # type: ignore[operator]
|
||||
|
||||
|
||||
def evalMove(ctx: QueryContext, u: CompValue) -> None:
|
||||
"""
|
||||
|
||||
remove all triples from dst
|
||||
add all triples from src to dst
|
||||
remove all triples from src
|
||||
|
||||
http://www.w3.org/TR/sparql11-update/#move
|
||||
"""
|
||||
|
||||
src, dst = u.graph
|
||||
|
||||
srcg = _graphOrDefault(ctx, src)
|
||||
dstg = _graphOrDefault(ctx, dst)
|
||||
|
||||
# type error: Item "None" of "Optional[Graph]" has no attribute "identifier"
|
||||
if srcg.identifier == dstg.identifier: # type: ignore[union-attr]
|
||||
return
|
||||
|
||||
# type error: Item "None" of "Optional[Graph]" has no attribute "remove"
|
||||
dstg.remove((None, None, None)) # type: ignore[union-attr]
|
||||
|
||||
# type error: Unsupported left operand type for + ("None")
|
||||
dstg += srcg # type: ignore[operator]
|
||||
|
||||
if ctx.dataset.store.graph_aware:
|
||||
# type error: Argument 1 to "remove_graph" of "Store" has incompatible type "Optional[Graph]"; expected "Graph"
|
||||
ctx.dataset.store.remove_graph(srcg) # type: ignore[arg-type]
|
||||
else:
|
||||
# type error: Item "None" of "Optional[Graph]" has no attribute "remove"
|
||||
srcg.remove((None, None, None)) # type: ignore[union-attr]
|
||||
|
||||
|
||||
def evalCopy(ctx: QueryContext, u: CompValue) -> None:
|
||||
"""
|
||||
|
||||
remove all triples from dst
|
||||
add all triples from src to dst
|
||||
|
||||
http://www.w3.org/TR/sparql11-update/#copy
|
||||
"""
|
||||
|
||||
src, dst = u.graph
|
||||
|
||||
srcg = _graphOrDefault(ctx, src)
|
||||
dstg = _graphOrDefault(ctx, dst)
|
||||
|
||||
# type error: Item "None" of "Optional[Graph]" has no attribute "remove"
|
||||
if srcg.identifier == dstg.identifier: # type: ignore[union-attr]
|
||||
return
|
||||
|
||||
# type error: Item "None" of "Optional[Graph]" has no attribute "remove"
|
||||
dstg.remove((None, None, None)) # type: ignore[union-attr]
|
||||
|
||||
# type error: Unsupported left operand type for + ("None")
|
||||
dstg += srcg # type: ignore[operator]
|
||||
|
||||
|
||||
def evalUpdate(
|
||||
graph: Graph,
|
||||
update: Update,
|
||||
initBindings: Optional[Mapping[str, Identifier]] = None,
|
||||
) -> None:
|
||||
"""
|
||||
|
||||
http://www.w3.org/TR/sparql11-update/#updateLanguage
|
||||
|
||||
'A request is a sequence of operations [...] Implementations MUST
|
||||
ensure that operations of a single request are executed in a
|
||||
fashion that guarantees the same effects as executing them in
|
||||
lexical order.
|
||||
|
||||
Operations all result either in success or failure.
|
||||
|
||||
If multiple operations are present in a single request, then a
|
||||
result of failure from any operation MUST abort the sequence of
|
||||
operations, causing the subsequent operations to be ignored.'
|
||||
|
||||
This will return None on success and raise Exceptions on error
|
||||
|
||||
.. caution::
|
||||
|
||||
This method can access indirectly requested network endpoints, for
|
||||
example, query processing will attempt to access network endpoints
|
||||
specified in ``SERVICE`` directives.
|
||||
|
||||
When processing untrusted or potentially malicious queries, measures
|
||||
should be taken to restrict network and file access.
|
||||
|
||||
For information on available security measures, see the RDFLib
|
||||
:doc:`Security Considerations </security_considerations>`
|
||||
documentation.
|
||||
|
||||
"""
|
||||
|
||||
for u in update.algebra:
|
||||
initBindings = dict((Variable(k), v) for k, v in (initBindings or {}).items())
|
||||
|
||||
ctx = QueryContext(graph, initBindings=initBindings)
|
||||
ctx.prologue = u.prologue
|
||||
|
||||
try:
|
||||
if u.name == "Load":
|
||||
evalLoad(ctx, u)
|
||||
elif u.name == "Clear":
|
||||
evalClear(ctx, u)
|
||||
elif u.name == "Drop":
|
||||
evalDrop(ctx, u)
|
||||
elif u.name == "Create":
|
||||
evalCreate(ctx, u)
|
||||
elif u.name == "Add":
|
||||
evalAdd(ctx, u)
|
||||
elif u.name == "Move":
|
||||
evalMove(ctx, u)
|
||||
elif u.name == "Copy":
|
||||
evalCopy(ctx, u)
|
||||
elif u.name == "InsertData":
|
||||
evalInsertData(ctx, u)
|
||||
elif u.name == "DeleteData":
|
||||
evalDeleteData(ctx, u)
|
||||
elif u.name == "DeleteWhere":
|
||||
evalDeleteWhere(ctx, u)
|
||||
elif u.name == "Modify":
|
||||
evalModify(ctx, u)
|
||||
else:
|
||||
raise Exception("Unknown update operation: %s" % (u,))
|
||||
except: # noqa: E722
|
||||
if not u.silent:
|
||||
raise
|
||||
@@ -0,0 +1,3 @@
|
||||
"""
|
||||
This package contains modules for additional RDFLib stores
|
||||
"""
|
||||
@@ -0,0 +1,199 @@
|
||||
"""
|
||||
|
||||
This wrapper intercepts calls through the store interface and implements
|
||||
thread-safe logging of destructive operations (adds / removes) in reverse.
|
||||
This is persisted on the store instance and the reverse operations are
|
||||
executed In order to return the store to the state it was when the transaction
|
||||
began Since the reverse operations are persisted on the store, the store
|
||||
itself acts as a transaction.
|
||||
|
||||
Calls to commit or rollback, flush the list of reverse operations This
|
||||
provides thread-safe atomicity and isolation (assuming concurrent operations
|
||||
occur with different store instances), but no durability (transactions are
|
||||
persisted in memory and won't be available to reverse operations after the
|
||||
system fails): A and I out of ACID.
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import threading
|
||||
from typing import TYPE_CHECKING, Any, Generator, Iterator, List, Optional, Tuple
|
||||
|
||||
from rdflib.graph import ConjunctiveGraph, Graph
|
||||
from rdflib.store import Store
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from rdflib.graph import (
|
||||
_ContextIdentifierType,
|
||||
_ContextType,
|
||||
_ObjectType,
|
||||
_PredicateType,
|
||||
_SubjectType,
|
||||
_TriplePatternType,
|
||||
_TripleType,
|
||||
)
|
||||
from rdflib.query import Result
|
||||
from rdflib.term import URIRef
|
||||
|
||||
|
||||
destructiveOpLocks = { # noqa: N816
|
||||
"add": None,
|
||||
"remove": None,
|
||||
}
|
||||
|
||||
|
||||
class AuditableStore(Store):
|
||||
def __init__(self, store: Store):
|
||||
self.store = store
|
||||
self.context_aware = store.context_aware
|
||||
# NOTE: this store can't be formula_aware as it doesn't have enough
|
||||
# info to reverse the removal of a quoted statement
|
||||
self.formula_aware = False # store.formula_aware
|
||||
self.transaction_aware = True # This is only half true
|
||||
self.reverseOps: List[
|
||||
Tuple[
|
||||
Optional[_SubjectType],
|
||||
Optional[_PredicateType],
|
||||
Optional[_ObjectType],
|
||||
Optional[_ContextIdentifierType],
|
||||
str,
|
||||
]
|
||||
] = []
|
||||
self.rollbackLock = threading.RLock()
|
||||
|
||||
def open(self, configuration: str, create: bool = True) -> Optional[int]:
|
||||
return self.store.open(configuration, create)
|
||||
|
||||
def close(self, commit_pending_transaction: bool = False) -> None:
|
||||
self.store.close()
|
||||
|
||||
def destroy(self, configuration: str) -> None:
|
||||
self.store.destroy(configuration)
|
||||
|
||||
def query(self, *args: Any, **kw: Any) -> Result:
|
||||
return self.store.query(*args, **kw)
|
||||
|
||||
def add(
|
||||
self, triple: _TripleType, context: _ContextType, quoted: bool = False
|
||||
) -> None:
|
||||
(s, p, o) = triple
|
||||
lock = destructiveOpLocks["add"]
|
||||
lock = lock if lock else threading.RLock()
|
||||
with lock:
|
||||
context = (
|
||||
context.__class__(self.store, context.identifier)
|
||||
if context is not None
|
||||
else None
|
||||
)
|
||||
ctxId = context.identifier if context is not None else None # noqa: N806
|
||||
if list(self.store.triples(triple, context)):
|
||||
return # triple already in store, do nothing
|
||||
self.reverseOps.append((s, p, o, ctxId, "remove"))
|
||||
try:
|
||||
self.reverseOps.remove((s, p, o, ctxId, "add"))
|
||||
except ValueError:
|
||||
pass
|
||||
self.store.add((s, p, o), context, quoted)
|
||||
|
||||
def remove(
|
||||
self, spo: _TriplePatternType, context: Optional[_ContextType] = None
|
||||
) -> None:
|
||||
subject, predicate, object_ = spo
|
||||
lock = destructiveOpLocks["remove"]
|
||||
lock = lock if lock else threading.RLock()
|
||||
with lock:
|
||||
# Need to determine which quads will be removed if any term is a
|
||||
# wildcard
|
||||
context = (
|
||||
context.__class__(self.store, context.identifier)
|
||||
if context is not None
|
||||
else None
|
||||
)
|
||||
ctxId = context.identifier if context is not None else None # noqa: N806
|
||||
if None in [subject, predicate, object_, context]:
|
||||
if ctxId:
|
||||
# type error: Item "None" of "Optional[Graph]" has no attribute "triples"
|
||||
for s, p, o in context.triples((subject, predicate, object_)): # type: ignore[union-attr]
|
||||
try:
|
||||
self.reverseOps.remove((s, p, o, ctxId, "remove"))
|
||||
except ValueError:
|
||||
self.reverseOps.append((s, p, o, ctxId, "add"))
|
||||
else:
|
||||
for s, p, o, ctx in ConjunctiveGraph(self.store).quads(
|
||||
(subject, predicate, object_)
|
||||
):
|
||||
try:
|
||||
# type error: Item "None" of "Optional[Graph]" has no attribute "identifier"
|
||||
self.reverseOps.remove((s, p, o, ctx.identifier, "remove")) # type: ignore[union-attr]
|
||||
except ValueError:
|
||||
# type error: Item "None" of "Optional[Graph]" has no attribute "identifier"
|
||||
self.reverseOps.append((s, p, o, ctx.identifier, "add")) # type: ignore[union-attr]
|
||||
else:
|
||||
if not list(self.triples((subject, predicate, object_), context)):
|
||||
return # triple not present in store, do nothing
|
||||
try:
|
||||
self.reverseOps.remove(
|
||||
(subject, predicate, object_, ctxId, "remove")
|
||||
)
|
||||
except ValueError:
|
||||
self.reverseOps.append((subject, predicate, object_, ctxId, "add"))
|
||||
self.store.remove((subject, predicate, object_), context)
|
||||
|
||||
def triples(
|
||||
self, triple: _TriplePatternType, context: Optional[_ContextType] = None
|
||||
) -> Iterator[Tuple[_TripleType, Iterator[Optional[_ContextType]]]]:
|
||||
(su, pr, ob) = triple
|
||||
context = (
|
||||
context.__class__(self.store, context.identifier)
|
||||
if context is not None
|
||||
else None
|
||||
)
|
||||
for (s, p, o), cg in self.store.triples((su, pr, ob), context):
|
||||
yield (s, p, o), cg
|
||||
|
||||
def __len__(self, context: Optional[_ContextType] = None):
|
||||
context = (
|
||||
context.__class__(self.store, context.identifier)
|
||||
if context is not None
|
||||
else None
|
||||
)
|
||||
return self.store.__len__(context)
|
||||
|
||||
def contexts(
|
||||
self, triple: Optional[_TripleType] = None
|
||||
) -> Generator[_ContextType, None, None]:
|
||||
for ctx in self.store.contexts(triple):
|
||||
yield ctx
|
||||
|
||||
def bind(self, prefix: str, namespace: URIRef, override: bool = True) -> None:
|
||||
self.store.bind(prefix, namespace, override=override)
|
||||
|
||||
def prefix(self, namespace: URIRef) -> Optional[str]:
|
||||
return self.store.prefix(namespace)
|
||||
|
||||
def namespace(self, prefix: str) -> Optional[URIRef]:
|
||||
return self.store.namespace(prefix)
|
||||
|
||||
def namespaces(self) -> Iterator[Tuple[str, URIRef]]:
|
||||
return self.store.namespaces()
|
||||
|
||||
def commit(self) -> None:
|
||||
self.reverseOps = []
|
||||
|
||||
def rollback(self) -> None:
|
||||
# Acquire Rollback lock and apply reverse operations in the forward
|
||||
# order
|
||||
with self.rollbackLock:
|
||||
for subject, predicate, obj, context, op in self.reverseOps:
|
||||
if op == "add":
|
||||
# type error: Argument 2 to "Graph" has incompatible type "Optional[Node]"; expected "Union[IdentifiedNode, str, None]"
|
||||
self.store.add(
|
||||
(subject, predicate, obj), Graph(self.store, context) # type: ignore[arg-type]
|
||||
)
|
||||
else:
|
||||
self.store.remove(
|
||||
(subject, predicate, obj), Graph(self.store, context)
|
||||
)
|
||||
|
||||
self.reverseOps = []
|
||||
@@ -0,0 +1,775 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from os import mkdir
|
||||
from os.path import abspath, exists
|
||||
from threading import Thread
|
||||
from typing import TYPE_CHECKING, Any, Callable, Dict, Generator, List, Optional, Tuple
|
||||
from urllib.request import pathname2url
|
||||
|
||||
from rdflib.store import NO_STORE, VALID_STORE, Store
|
||||
from rdflib.term import Identifier, Node, URIRef
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from rdflib.graph import Graph, _ContextType, _TriplePatternType, _TripleType
|
||||
|
||||
|
||||
def bb(u: str) -> bytes:
|
||||
return u.encode("utf-8")
|
||||
|
||||
|
||||
try:
|
||||
from berkeleydb import db
|
||||
|
||||
has_bsddb = True
|
||||
except ImportError:
|
||||
has_bsddb = False
|
||||
|
||||
|
||||
if has_bsddb:
|
||||
# These are passed to bsddb when creating DBs
|
||||
|
||||
# passed to db.DBEnv.set_flags
|
||||
ENVSETFLAGS = db.DB_CDB_ALLDB
|
||||
# passed to db.DBEnv.open
|
||||
ENVFLAGS = db.DB_INIT_MPOOL | db.DB_INIT_CDB | db.DB_THREAD
|
||||
CACHESIZE = 1024 * 1024 * 50
|
||||
|
||||
# passed to db.DB.Open()
|
||||
DBOPENFLAGS = db.DB_THREAD
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
__all__ = [
|
||||
"BerkeleyDB",
|
||||
"_ToKeyFunc",
|
||||
"_FromKeyFunc",
|
||||
"_GetPrefixFunc",
|
||||
"_ResultsFromKeyFunc",
|
||||
]
|
||||
|
||||
|
||||
_ToKeyFunc = Callable[[Tuple[bytes, bytes, bytes], bytes], bytes]
|
||||
_FromKeyFunc = Callable[[bytes], Tuple[bytes, bytes, bytes, bytes]]
|
||||
_GetPrefixFunc = Callable[
|
||||
[Tuple[str, str, str], Optional[str]], Generator[str, None, None]
|
||||
]
|
||||
_ResultsFromKeyFunc = Callable[
|
||||
[bytes, Optional[Node], Optional[Node], Optional[Node], bytes],
|
||||
Tuple[Tuple[Node, Node, Node], Generator[Node, None, None]],
|
||||
]
|
||||
|
||||
|
||||
class BerkeleyDB(Store):
|
||||
"""\
|
||||
A store that allows for on-disk persistent using BerkeleyDB, a fast
|
||||
key/value DB.
|
||||
|
||||
This store implementation used to be known, previous to rdflib 6.0.0
|
||||
as 'Sleepycat' due to that being the then name of the Python wrapper
|
||||
for BerkeleyDB.
|
||||
|
||||
This store allows for quads as well as triples. See examples of use
|
||||
in both the `examples.berkeleydb_example` and ``test/test_store/test_store_berkeleydb.py``
|
||||
files.
|
||||
|
||||
**NOTE on installation**:
|
||||
|
||||
To use this store, you must have BerkeleyDB installed on your system
|
||||
separately to Python (``brew install berkeley-db`` on a Mac) and also have
|
||||
the BerkeleyDB Python wrapper installed (``pip install berkeleydb``).
|
||||
You may need to install BerkeleyDB Python wrapper like this:
|
||||
``YES_I_HAVE_THE_RIGHT_TO_USE_THIS_BERKELEY_DB_VERSION=1 pip install berkeleydb``
|
||||
"""
|
||||
|
||||
context_aware = True
|
||||
formula_aware = True
|
||||
transaction_aware = False
|
||||
graph_aware = True
|
||||
db_env: db.DBEnv = None
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
configuration: Optional[str] = None,
|
||||
identifier: Optional[Identifier] = None,
|
||||
):
|
||||
if not has_bsddb:
|
||||
raise ImportError("Unable to import berkeleydb, store is unusable.")
|
||||
self.__open = False
|
||||
self.__identifier = identifier
|
||||
super(BerkeleyDB, self).__init__(configuration)
|
||||
self._loads = self.node_pickler.loads
|
||||
self._dumps = self.node_pickler.dumps
|
||||
self.__indicies_info: List[Tuple[Any, _ToKeyFunc, _FromKeyFunc]]
|
||||
|
||||
def __get_identifier(self) -> Optional[Identifier]:
|
||||
return self.__identifier
|
||||
|
||||
identifier = property(__get_identifier)
|
||||
|
||||
def _init_db_environment(
|
||||
self, homeDir: str, create: bool = True # noqa: N803
|
||||
) -> db.DBEnv:
|
||||
if not exists(homeDir):
|
||||
if create is True:
|
||||
mkdir(homeDir)
|
||||
# TODO: implement create method and refactor this to it
|
||||
self.create(homeDir)
|
||||
else:
|
||||
return NO_STORE
|
||||
db_env = db.DBEnv()
|
||||
db_env.set_cachesize(0, CACHESIZE) # TODO
|
||||
# db_env.set_lg_max(1024*1024)
|
||||
db_env.set_flags(ENVSETFLAGS, 1)
|
||||
db_env.open(homeDir, ENVFLAGS | db.DB_CREATE)
|
||||
return db_env
|
||||
|
||||
def is_open(self) -> bool:
|
||||
return self.__open
|
||||
|
||||
def open(self, path: str, create: bool = True) -> Optional[int]:
|
||||
if not has_bsddb:
|
||||
return NO_STORE
|
||||
homeDir = path # noqa: N806
|
||||
|
||||
if self.__identifier is None:
|
||||
self.__identifier = URIRef(pathname2url(abspath(homeDir)))
|
||||
|
||||
db_env = self._init_db_environment(homeDir, create)
|
||||
if db_env == NO_STORE:
|
||||
return NO_STORE
|
||||
self.db_env = db_env
|
||||
self.__open = True
|
||||
|
||||
dbname = None
|
||||
dbtype = db.DB_BTREE
|
||||
# auto-commit ensures that the open-call commits when transactions
|
||||
# are enabled
|
||||
|
||||
dbopenflags = DBOPENFLAGS
|
||||
if self.transaction_aware is True:
|
||||
dbopenflags |= db.DB_AUTO_COMMIT
|
||||
|
||||
if create:
|
||||
dbopenflags |= db.DB_CREATE
|
||||
|
||||
dbmode = 0o660
|
||||
dbsetflags = 0
|
||||
|
||||
# create and open the DBs
|
||||
self.__indicies: List[db.DB] = [
|
||||
None,
|
||||
] * 3
|
||||
# NOTE on type ingore: this is because type checker does not like this
|
||||
# way of initializing, using a temporary variable will solve it.
|
||||
# type error: error: List item 0 has incompatible type "None"; expected "Tuple[Any, Callable[[Tuple[bytes, bytes, bytes], bytes], bytes], Callable[[bytes], Tuple[bytes, bytes, bytes, bytes]]]"
|
||||
self.__indicies_info = [
|
||||
None, # type: ignore[list-item]
|
||||
] * 3
|
||||
for i in range(0, 3):
|
||||
index_name = to_key_func(i)(
|
||||
("s".encode("latin-1"), "p".encode("latin-1"), "o".encode("latin-1")),
|
||||
"c".encode("latin-1"),
|
||||
).decode()
|
||||
index = db.DB(db_env)
|
||||
index.set_flags(dbsetflags)
|
||||
index.open(index_name, dbname, dbtype, dbopenflags, dbmode)
|
||||
self.__indicies[i] = index
|
||||
self.__indicies_info[i] = (index, to_key_func(i), from_key_func(i))
|
||||
|
||||
lookup: Dict[
|
||||
int, Tuple[db.DB, _GetPrefixFunc, _FromKeyFunc, _ResultsFromKeyFunc]
|
||||
] = {}
|
||||
for i in range(0, 8):
|
||||
results: List[Tuple[Tuple[int, int], int, int]] = []
|
||||
for start in range(0, 3):
|
||||
score = 1
|
||||
len = 0
|
||||
for j in range(start, start + 3):
|
||||
if i & (1 << (j % 3)):
|
||||
score = score << 1
|
||||
len += 1
|
||||
else:
|
||||
break
|
||||
tie_break = 2 - start
|
||||
results.append(((score, tie_break), start, len))
|
||||
|
||||
results.sort()
|
||||
# NOTE on type error: this is because the variable `score` is
|
||||
# reused with different type
|
||||
# type error: Incompatible types in assignment (expression has type "Tuple[int, int]", variable has type "int")
|
||||
score, start, len = results[-1] # type: ignore[assignment]
|
||||
|
||||
def get_prefix_func(start: int, end: int) -> _GetPrefixFunc:
|
||||
def get_prefix(
|
||||
triple: Tuple[str, str, str], context: Optional[str]
|
||||
) -> Generator[str, None, None]:
|
||||
if context is None:
|
||||
yield ""
|
||||
else:
|
||||
yield context
|
||||
i = start
|
||||
while i < end:
|
||||
yield triple[i % 3]
|
||||
i += 1
|
||||
yield ""
|
||||
|
||||
return get_prefix
|
||||
|
||||
lookup[i] = (
|
||||
self.__indicies[start],
|
||||
get_prefix_func(start, start + len),
|
||||
from_key_func(start),
|
||||
results_from_key_func(start, self._from_string),
|
||||
)
|
||||
|
||||
self.__lookup_dict = lookup
|
||||
|
||||
self.__contexts = db.DB(db_env)
|
||||
self.__contexts.set_flags(dbsetflags)
|
||||
self.__contexts.open("contexts", dbname, dbtype, dbopenflags, dbmode)
|
||||
|
||||
self.__namespace = db.DB(db_env)
|
||||
self.__namespace.set_flags(dbsetflags)
|
||||
self.__namespace.open("namespace", dbname, dbtype, dbopenflags, dbmode)
|
||||
|
||||
self.__prefix = db.DB(db_env)
|
||||
self.__prefix.set_flags(dbsetflags)
|
||||
self.__prefix.open("prefix", dbname, dbtype, dbopenflags, dbmode)
|
||||
|
||||
self.__k2i = db.DB(db_env)
|
||||
self.__k2i.set_flags(dbsetflags)
|
||||
self.__k2i.open("k2i", dbname, db.DB_HASH, dbopenflags, dbmode)
|
||||
|
||||
self.__i2k = db.DB(db_env)
|
||||
self.__i2k.set_flags(dbsetflags)
|
||||
self.__i2k.open("i2k", dbname, db.DB_RECNO, dbopenflags, dbmode)
|
||||
|
||||
self.__needs_sync = False
|
||||
t = Thread(target=self.__sync_run)
|
||||
t.setDaemon(True)
|
||||
t.start()
|
||||
self.__sync_thread = t
|
||||
return VALID_STORE
|
||||
|
||||
def __sync_run(self) -> None:
|
||||
from time import sleep, time
|
||||
|
||||
try:
|
||||
min_seconds, max_seconds = 10, 300
|
||||
while self.__open:
|
||||
if self.__needs_sync:
|
||||
t0 = t1 = time()
|
||||
self.__needs_sync = False
|
||||
while self.__open:
|
||||
sleep(0.1)
|
||||
if self.__needs_sync:
|
||||
t1 = time()
|
||||
self.__needs_sync = False
|
||||
if time() - t1 > min_seconds or time() - t0 > max_seconds:
|
||||
self.__needs_sync = False
|
||||
logger.debug("sync")
|
||||
self.sync()
|
||||
break
|
||||
else:
|
||||
sleep(1)
|
||||
except Exception as e:
|
||||
logger.exception(e)
|
||||
|
||||
def sync(self) -> None:
|
||||
if self.__open:
|
||||
for i in self.__indicies:
|
||||
i.sync()
|
||||
self.__contexts.sync()
|
||||
self.__namespace.sync()
|
||||
self.__prefix.sync()
|
||||
self.__i2k.sync()
|
||||
self.__k2i.sync()
|
||||
|
||||
def close(self, commit_pending_transaction: bool = False) -> None:
|
||||
self.__open = False
|
||||
self.__sync_thread.join()
|
||||
for i in self.__indicies:
|
||||
i.close()
|
||||
self.__contexts.close()
|
||||
self.__namespace.close()
|
||||
self.__prefix.close()
|
||||
self.__i2k.close()
|
||||
self.__k2i.close()
|
||||
self.db_env.close()
|
||||
|
||||
def add(
|
||||
self,
|
||||
triple: _TripleType,
|
||||
context: _ContextType,
|
||||
quoted: bool = False,
|
||||
txn: Optional[Any] = None,
|
||||
) -> None:
|
||||
"""\
|
||||
Add a triple to the store of triples.
|
||||
"""
|
||||
(subject, predicate, object) = triple
|
||||
assert self.__open, "The Store must be open."
|
||||
assert context != self, "Can not add triple directly to store"
|
||||
Store.add(self, (subject, predicate, object), context, quoted)
|
||||
|
||||
_to_string = self._to_string
|
||||
|
||||
s = _to_string(subject, txn=txn)
|
||||
p = _to_string(predicate, txn=txn)
|
||||
o = _to_string(object, txn=txn)
|
||||
c = _to_string(context, txn=txn)
|
||||
|
||||
cspo, cpos, cosp = self.__indicies
|
||||
|
||||
value = cspo.get(bb("%s^%s^%s^%s^" % (c, s, p, o)), txn=txn)
|
||||
if value is None:
|
||||
self.__contexts.put(bb(c), b"", txn=txn)
|
||||
|
||||
contexts_value = cspo.get(
|
||||
bb("%s^%s^%s^%s^" % ("", s, p, o)), txn=txn
|
||||
) or "".encode("latin-1")
|
||||
contexts = set(contexts_value.split("^".encode("latin-1")))
|
||||
contexts.add(bb(c))
|
||||
contexts_value = "^".encode("latin-1").join(contexts)
|
||||
assert contexts_value is not None
|
||||
|
||||
cspo.put(bb("%s^%s^%s^%s^" % (c, s, p, o)), b"", txn=txn)
|
||||
cpos.put(bb("%s^%s^%s^%s^" % (c, p, o, s)), b"", txn=txn)
|
||||
cosp.put(bb("%s^%s^%s^%s^" % (c, o, s, p)), b"", txn=txn)
|
||||
if not quoted:
|
||||
cspo.put(bb("%s^%s^%s^%s^" % ("", s, p, o)), contexts_value, txn=txn)
|
||||
cpos.put(bb("%s^%s^%s^%s^" % ("", p, o, s)), contexts_value, txn=txn)
|
||||
cosp.put(bb("%s^%s^%s^%s^" % ("", o, s, p)), contexts_value, txn=txn)
|
||||
|
||||
self.__needs_sync = True
|
||||
|
||||
def __remove(
|
||||
self,
|
||||
spo: Tuple[bytes, bytes, bytes],
|
||||
c: bytes,
|
||||
quoted: bool = False,
|
||||
txn: Optional[Any] = None,
|
||||
) -> None:
|
||||
s, p, o = spo
|
||||
cspo, cpos, cosp = self.__indicies
|
||||
contexts_value = cspo.get(
|
||||
"^".encode("latin-1").join(
|
||||
["".encode("latin-1"), s, p, o, "".encode("latin-1")]
|
||||
),
|
||||
txn=txn,
|
||||
) or "".encode("latin-1")
|
||||
contexts = set(contexts_value.split("^".encode("latin-1")))
|
||||
contexts.discard(c)
|
||||
contexts_value = "^".encode("latin-1").join(contexts)
|
||||
for i, _to_key, _from_key in self.__indicies_info:
|
||||
i.delete(_to_key((s, p, o), c), txn=txn)
|
||||
if not quoted:
|
||||
if contexts_value:
|
||||
for i, _to_key, _from_key in self.__indicies_info:
|
||||
i.put(
|
||||
_to_key((s, p, o), "".encode("latin-1")),
|
||||
contexts_value,
|
||||
txn=txn,
|
||||
)
|
||||
else:
|
||||
for i, _to_key, _from_key in self.__indicies_info:
|
||||
try:
|
||||
i.delete(_to_key((s, p, o), "".encode("latin-1")), txn=txn)
|
||||
except db.DBNotFoundError:
|
||||
pass # TODO: is it okay to ignore these?
|
||||
|
||||
# type error: Signature of "remove" incompatible with supertype "Store"
|
||||
def remove( # type: ignore[override]
|
||||
self,
|
||||
spo: _TriplePatternType,
|
||||
context: Optional[_ContextType],
|
||||
txn: Optional[Any] = None,
|
||||
) -> None:
|
||||
subject, predicate, object = spo
|
||||
assert self.__open, "The Store must be open."
|
||||
Store.remove(self, (subject, predicate, object), context)
|
||||
_to_string = self._to_string
|
||||
|
||||
if context is not None:
|
||||
if context == self:
|
||||
context = None
|
||||
|
||||
if (
|
||||
subject is not None
|
||||
and predicate is not None
|
||||
and object is not None
|
||||
and context is not None
|
||||
):
|
||||
s = _to_string(subject, txn=txn)
|
||||
p = _to_string(predicate, txn=txn)
|
||||
o = _to_string(object, txn=txn)
|
||||
c = _to_string(context, txn=txn)
|
||||
value = self.__indicies[0].get(bb("%s^%s^%s^%s^" % (c, s, p, o)), txn=txn)
|
||||
if value is not None:
|
||||
self.__remove((bb(s), bb(p), bb(o)), bb(c), txn=txn)
|
||||
self.__needs_sync = True
|
||||
else:
|
||||
cspo, cpos, cosp = self.__indicies
|
||||
index, prefix, from_key, results_from_key = self.__lookup(
|
||||
(subject, predicate, object), context, txn=txn
|
||||
)
|
||||
|
||||
cursor = index.cursor(txn=txn)
|
||||
try:
|
||||
current = cursor.set_range(prefix)
|
||||
needs_sync = True
|
||||
except db.DBNotFoundError:
|
||||
current = None
|
||||
needs_sync = False
|
||||
cursor.close()
|
||||
while current:
|
||||
key, value = current
|
||||
cursor = index.cursor(txn=txn)
|
||||
try:
|
||||
cursor.set_range(key)
|
||||
current = cursor.next
|
||||
except db.DBNotFoundError:
|
||||
current = None
|
||||
cursor.close()
|
||||
if key.startswith(prefix):
|
||||
# NOTE on type error: variables are being reused with a
|
||||
# different type
|
||||
# type error: Incompatible types in assignment (expression has type "bytes", variable has type "str")
|
||||
c, s, p, o = from_key(key) # type: ignore[assignment]
|
||||
if context is None:
|
||||
contexts_value = index.get(key, txn=txn) or "".encode("latin-1")
|
||||
# remove triple from all non quoted contexts
|
||||
contexts = set(contexts_value.split("^".encode("latin-1")))
|
||||
# and from the conjunctive index
|
||||
contexts.add("".encode("latin-1"))
|
||||
for c in contexts:
|
||||
for i, _to_key, _ in self.__indicies_info:
|
||||
# NOTE on type error: variables are being
|
||||
# reused with a different type
|
||||
# type error: Argument 1 has incompatible type "Tuple[str, str, str]"; expected "Tuple[bytes, bytes, bytes]"
|
||||
# type error: Argument 2 has incompatible type "str"; expected "bytes"
|
||||
i.delete(_to_key((s, p, o), c), txn=txn) # type: ignore[arg-type]
|
||||
else:
|
||||
# type error: Argument 1 to "__remove" of "BerkeleyDB" has incompatible type "Tuple[str, str, str]"; expected "Tuple[bytes, bytes, bytes]"
|
||||
# type error: Argument 2 to "__remove" of "BerkeleyDB" has incompatible type "str"; expected "bytes"
|
||||
self.__remove((s, p, o), c, txn=txn) # type: ignore[arg-type]
|
||||
else:
|
||||
break
|
||||
|
||||
if context is not None:
|
||||
if subject is None and predicate is None and object is None:
|
||||
# TODO: also if context becomes empty and not just on
|
||||
# remove((None, None, None), c)
|
||||
try:
|
||||
self.__contexts.delete(
|
||||
bb(_to_string(context, txn=txn)), txn=txn
|
||||
)
|
||||
except db.DBNotFoundError:
|
||||
pass
|
||||
|
||||
self.__needs_sync = needs_sync
|
||||
|
||||
def triples(
|
||||
self,
|
||||
spo: _TriplePatternType,
|
||||
context: Optional[_ContextType] = None,
|
||||
txn: Optional[Any] = None,
|
||||
) -> Generator[
|
||||
Tuple[_TripleType, Generator[Optional[_ContextType], None, None]],
|
||||
None,
|
||||
None,
|
||||
]:
|
||||
"""A generator over all the triples matching"""
|
||||
assert self.__open, "The Store must be open."
|
||||
|
||||
subject, predicate, object = spo
|
||||
|
||||
if context is not None:
|
||||
if context == self:
|
||||
context = None
|
||||
|
||||
# _from_string = self._from_string ## UNUSED
|
||||
index, prefix, from_key, results_from_key = self.__lookup(
|
||||
(subject, predicate, object), context, txn=txn
|
||||
)
|
||||
|
||||
cursor = index.cursor(txn=txn)
|
||||
try:
|
||||
current = cursor.set_range(prefix)
|
||||
except db.DBNotFoundError:
|
||||
current = None
|
||||
cursor.close()
|
||||
while current:
|
||||
key, value = current
|
||||
cursor = index.cursor(txn=txn)
|
||||
try:
|
||||
cursor.set_range(key)
|
||||
current = cursor.next
|
||||
except db.DBNotFoundError:
|
||||
current = None
|
||||
cursor.close()
|
||||
if key and key.startswith(prefix):
|
||||
contexts_value = index.get(key, txn=txn)
|
||||
# type error: Incompatible types in "yield" (actual type "Tuple[Tuple[Node, Node, Node], Generator[Node, None, None]]", expected type "Tuple[Tuple[IdentifiedNode, URIRef, Identifier], Iterator[Optional[Graph]]]")
|
||||
# NOTE on type ignore: this is needed because some context is
|
||||
# lost in the process of extracting triples from the database.
|
||||
yield results_from_key(key, subject, predicate, object, contexts_value) # type: ignore[misc]
|
||||
else:
|
||||
break
|
||||
|
||||
def __len__(self, context: Optional[_ContextType] = None) -> int:
|
||||
assert self.__open, "The Store must be open."
|
||||
if context is not None:
|
||||
if context == self:
|
||||
context = None
|
||||
|
||||
if context is None:
|
||||
prefix = "^".encode("latin-1")
|
||||
else:
|
||||
prefix = bb("%s^" % self._to_string(context))
|
||||
|
||||
index = self.__indicies[0]
|
||||
cursor = index.cursor()
|
||||
current = cursor.set_range(prefix)
|
||||
count = 0
|
||||
while current:
|
||||
key, value = current
|
||||
if key.startswith(prefix):
|
||||
count += 1
|
||||
current = cursor.next
|
||||
else:
|
||||
break
|
||||
cursor.close()
|
||||
return count
|
||||
|
||||
def bind(self, prefix: str, namespace: URIRef, override: bool = True) -> None:
|
||||
# NOTE on type error: this is because the variables are reused with
|
||||
# another type.
|
||||
# type error: Incompatible types in assignment (expression has type "bytes", variable has type "str")
|
||||
prefix = prefix.encode("utf-8") # type: ignore[assignment]
|
||||
# type error: Incompatible types in assignment (expression has type "bytes", variable has type "URIRef")
|
||||
namespace = namespace.encode("utf-8") # type: ignore[assignment]
|
||||
bound_prefix = self.__prefix.get(namespace)
|
||||
bound_namespace = self.__namespace.get(prefix)
|
||||
if override:
|
||||
if bound_prefix:
|
||||
self.__namespace.delete(bound_prefix)
|
||||
if bound_namespace:
|
||||
self.__prefix.delete(bound_namespace)
|
||||
self.__prefix[namespace] = prefix
|
||||
self.__namespace[prefix] = namespace
|
||||
else:
|
||||
self.__prefix[bound_namespace or namespace] = bound_prefix or prefix
|
||||
self.__namespace[bound_prefix or prefix] = bound_namespace or namespace
|
||||
|
||||
def namespace(self, prefix: str) -> Optional[URIRef]:
|
||||
# NOTE on type error: this is because the variable is reused with
|
||||
# another type.
|
||||
# type error: Incompatible types in assignment (expression has type "bytes", variable has type "str")
|
||||
prefix = prefix.encode("utf-8") # type: ignore[assignment]
|
||||
ns = self.__namespace.get(prefix, None)
|
||||
if ns is not None:
|
||||
return URIRef(ns.decode("utf-8"))
|
||||
return None
|
||||
|
||||
def prefix(self, namespace: URIRef) -> Optional[str]:
|
||||
# NOTE on type error: this is because the variable is reused with
|
||||
# another type.
|
||||
# type error: Incompatible types in assignment (expression has type "bytes", variable has type "URIRef")
|
||||
namespace = namespace.encode("utf-8") # type: ignore[assignment]
|
||||
prefix = self.__prefix.get(namespace, None)
|
||||
if prefix is not None:
|
||||
return prefix.decode("utf-8")
|
||||
return None
|
||||
|
||||
def namespaces(self) -> Generator[Tuple[str, URIRef], None, None]:
|
||||
cursor = self.__namespace.cursor()
|
||||
results = []
|
||||
current = cursor.first()
|
||||
while current:
|
||||
prefix, namespace = current
|
||||
results.append((prefix.decode("utf-8"), namespace.decode("utf-8")))
|
||||
current = cursor.next
|
||||
cursor.close()
|
||||
for prefix, namespace in results:
|
||||
yield prefix, URIRef(namespace)
|
||||
|
||||
def contexts(
|
||||
self, triple: Optional[_TripleType] = None
|
||||
) -> Generator[_ContextType, None, None]:
|
||||
_from_string = self._from_string
|
||||
_to_string = self._to_string
|
||||
# NOTE on type errors: context is lost because of how data is loaded
|
||||
# from the DB.
|
||||
if triple:
|
||||
s: str
|
||||
p: str
|
||||
o: str
|
||||
# type error: Incompatible types in assignment (expression has type "Node", variable has type "str")
|
||||
s, p, o = triple # type: ignore[assignment]
|
||||
# type error: Argument 1 has incompatible type "str"; expected "Node"
|
||||
s = _to_string(s) # type: ignore[arg-type]
|
||||
# type error: Argument 1 has incompatible type "str"; expected "Node"
|
||||
p = _to_string(p) # type: ignore[arg-type]
|
||||
# type error: Argument 1 has incompatible type "str"; expected "Node"
|
||||
o = _to_string(o) # type: ignore[arg-type]
|
||||
contexts = self.__indicies[0].get(bb("%s^%s^%s^%s^" % ("", s, p, o)))
|
||||
if contexts:
|
||||
for c in contexts.split("^".encode("latin-1")):
|
||||
if c:
|
||||
# type error: Incompatible types in "yield" (actual type "Node", expected type "Graph")
|
||||
yield _from_string(c) # type: ignore[misc]
|
||||
else:
|
||||
index = self.__contexts
|
||||
cursor = index.cursor()
|
||||
current = cursor.first()
|
||||
cursor.close()
|
||||
while current:
|
||||
key, value = current
|
||||
context = _from_string(key)
|
||||
# type error: Incompatible types in "yield" (actual type "Node", expected type "Graph")
|
||||
yield context # type: ignore[misc]
|
||||
cursor = index.cursor()
|
||||
try:
|
||||
cursor.set_range(key)
|
||||
current = cursor.next
|
||||
except db.DBNotFoundError:
|
||||
current = None
|
||||
cursor.close()
|
||||
|
||||
def add_graph(self, graph: Graph) -> None:
|
||||
self.__contexts.put(bb(self._to_string(graph)), b"")
|
||||
|
||||
def remove_graph(self, graph: Graph):
|
||||
self.remove((None, None, None), graph)
|
||||
|
||||
def _from_string(self, i: bytes) -> Node:
|
||||
k = self.__i2k.get(int(i))
|
||||
return self._loads(k)
|
||||
|
||||
def _to_string(self, term: Node, txn: Optional[Any] = None) -> str:
|
||||
k = self._dumps(term)
|
||||
i = self.__k2i.get(k, txn=txn)
|
||||
if i is None:
|
||||
# weird behaviour from bsddb not taking a txn as a keyword argument
|
||||
# for append
|
||||
if self.transaction_aware:
|
||||
i = "%s" % self.__i2k.append(k, txn)
|
||||
else:
|
||||
i = "%s" % self.__i2k.append(k)
|
||||
|
||||
self.__k2i.put(k, i.encode(), txn=txn)
|
||||
else:
|
||||
i = i.decode()
|
||||
return i
|
||||
|
||||
def __lookup(
|
||||
self,
|
||||
spo: _TriplePatternType,
|
||||
context: Optional[_ContextType],
|
||||
txn: Optional[Any] = None,
|
||||
) -> Tuple[db.DB, bytes, _FromKeyFunc, _ResultsFromKeyFunc]:
|
||||
subject, predicate, object = spo
|
||||
_to_string = self._to_string
|
||||
# NOTE on type errors: this is because the same variable is used with different types.
|
||||
if context is not None:
|
||||
# type error: Incompatible types in assignment (expression has type "str", variable has type "Optional[Graph]")
|
||||
context = _to_string(context, txn=txn) # type: ignore[assignment]
|
||||
i = 0
|
||||
if subject is not None:
|
||||
i += 1
|
||||
# type error: Incompatible types in assignment (expression has type "str", variable has type "Node")
|
||||
subject = _to_string(subject, txn=txn) # type: ignore[assignment]
|
||||
if predicate is not None:
|
||||
i += 2
|
||||
# type error: Incompatible types in assignment (expression has type "str", variable has type "Node")
|
||||
predicate = _to_string(predicate, txn=txn) # type: ignore[assignment]
|
||||
if object is not None:
|
||||
i += 4
|
||||
# type error: Incompatible types in assignment (expression has type "str", variable has type "Node")
|
||||
object = _to_string(object, txn=txn) # type: ignore[assignment]
|
||||
index, prefix_func, from_key, results_from_key = self.__lookup_dict[i]
|
||||
# print (subject, predicate, object), context, prefix_func, index
|
||||
# #DEBUG
|
||||
# type error: Argument 1 has incompatible type "Tuple[Node, Node, Node]"; expected "Tuple[str, str, str]"
|
||||
# type error: Argument 2 has incompatible type "Optional[Graph]"; expected "Optional[str]"
|
||||
prefix = bb("^".join(prefix_func((subject, predicate, object), context))) # type: ignore[arg-type]
|
||||
return index, prefix, from_key, results_from_key
|
||||
|
||||
|
||||
def to_key_func(i: int) -> _ToKeyFunc:
|
||||
def to_key(triple: Tuple[bytes, bytes, bytes], context: bytes) -> bytes:
|
||||
"Takes a string; returns key"
|
||||
return "^".encode("latin-1").join(
|
||||
(
|
||||
context,
|
||||
triple[i % 3],
|
||||
triple[(i + 1) % 3],
|
||||
triple[(i + 2) % 3],
|
||||
"".encode("latin-1"),
|
||||
)
|
||||
) # "" to tac on the trailing ^
|
||||
|
||||
return to_key
|
||||
|
||||
|
||||
def from_key_func(i: int) -> _FromKeyFunc:
|
||||
def from_key(key: bytes) -> Tuple[bytes, bytes, bytes, bytes]:
|
||||
"Takes a key; returns string"
|
||||
parts = key.split("^".encode("latin-1"))
|
||||
return (
|
||||
parts[0],
|
||||
parts[(3 - i + 0) % 3 + 1],
|
||||
parts[(3 - i + 1) % 3 + 1],
|
||||
parts[(3 - i + 2) % 3 + 1],
|
||||
)
|
||||
|
||||
return from_key
|
||||
|
||||
|
||||
def results_from_key_func(
|
||||
i: int, from_string: Callable[[bytes], Node]
|
||||
) -> _ResultsFromKeyFunc:
|
||||
def from_key(
|
||||
key: bytes,
|
||||
subject: Optional[Node],
|
||||
predicate: Optional[Node],
|
||||
object: Optional[Node],
|
||||
contexts_value: bytes,
|
||||
) -> Tuple[Tuple[Node, Node, Node], Generator[Node, None, None]]:
|
||||
"Takes a key and subject, predicate, object; returns tuple for yield"
|
||||
parts = key.split("^".encode("latin-1"))
|
||||
if subject is None:
|
||||
# TODO: i & 1: # dis assemble and/or measure to see which is faster
|
||||
# subject is None or i & 1
|
||||
s = from_string(parts[(3 - i + 0) % 3 + 1])
|
||||
else:
|
||||
s = subject
|
||||
if predicate is None: # i & 2:
|
||||
p = from_string(parts[(3 - i + 1) % 3 + 1])
|
||||
else:
|
||||
p = predicate
|
||||
if object is None: # i & 4:
|
||||
o = from_string(parts[(3 - i + 2) % 3 + 1])
|
||||
else:
|
||||
o = object
|
||||
return (
|
||||
(s, p, o),
|
||||
(from_string(c) for c in contexts_value.split("^".encode("latin-1")) if c),
|
||||
)
|
||||
|
||||
return from_key
|
||||
|
||||
|
||||
# TODO: Remove unused
|
||||
def readable_index(i: int) -> str:
|
||||
# type error: Unpacking a string is disallowed
|
||||
s, p, o = "?" * 3 # type: ignore[misc]
|
||||
if i & 1:
|
||||
s = "s"
|
||||
if i & 2:
|
||||
p = "p"
|
||||
if i & 4:
|
||||
o = "o"
|
||||
return "%s,%s,%s" % (s, p, o)
|
||||
@@ -0,0 +1,95 @@
|
||||
from threading import Lock
|
||||
|
||||
|
||||
class ResponsibleGenerator:
|
||||
"""A generator that will help clean up when it is done being used."""
|
||||
|
||||
__slots__ = ["cleanup", "gen"]
|
||||
|
||||
def __init__(self, gen, cleanup):
|
||||
self.cleanup = cleanup
|
||||
self.gen = gen
|
||||
|
||||
def __del__(self):
|
||||
self.cleanup()
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
return next(self.gen)
|
||||
|
||||
|
||||
class ConcurrentStore:
|
||||
def __init__(self, store):
|
||||
self.store = store
|
||||
|
||||
# number of calls to visit still in progress
|
||||
self.__visit_count = 0
|
||||
|
||||
# lock for locking down the indices
|
||||
self.__lock = Lock()
|
||||
|
||||
# lists for keeping track of added and removed triples while
|
||||
# we wait for the lock
|
||||
self.__pending_removes = []
|
||||
self.__pending_adds = []
|
||||
|
||||
def add(self, triple):
|
||||
(s, p, o) = triple
|
||||
if self.__visit_count == 0:
|
||||
self.store.add((s, p, o))
|
||||
else:
|
||||
self.__pending_adds.append((s, p, o))
|
||||
|
||||
def remove(self, triple):
|
||||
(s, p, o) = triple
|
||||
if self.__visit_count == 0:
|
||||
self.store.remove((s, p, o))
|
||||
else:
|
||||
self.__pending_removes.append((s, p, o))
|
||||
|
||||
def triples(self, triple):
|
||||
(su, pr, ob) = triple
|
||||
g = self.store.triples((su, pr, ob))
|
||||
pending_removes = self.__pending_removes
|
||||
self.__begin_read()
|
||||
for s, p, o in ResponsibleGenerator(g, self.__end_read):
|
||||
if not (s, p, o) in pending_removes: # noqa: E713
|
||||
yield s, p, o
|
||||
|
||||
for s, p, o in self.__pending_adds:
|
||||
if (
|
||||
(su is None or su == s)
|
||||
and (pr is None or pr == p)
|
||||
and (ob is None or ob == o)
|
||||
):
|
||||
yield s, p, o
|
||||
|
||||
def __len__(self):
|
||||
return self.store.__len__()
|
||||
|
||||
def __begin_read(self):
|
||||
lock = self.__lock
|
||||
lock.acquire()
|
||||
self.__visit_count = self.__visit_count + 1
|
||||
lock.release()
|
||||
|
||||
def __end_read(self):
|
||||
lock = self.__lock
|
||||
lock.acquire()
|
||||
self.__visit_count = self.__visit_count - 1
|
||||
if self.__visit_count == 0:
|
||||
pending_removes = self.__pending_removes
|
||||
while pending_removes:
|
||||
(s, p, o) = pending_removes.pop()
|
||||
try:
|
||||
self.store.remove((s, p, o))
|
||||
except: # noqa: E722
|
||||
# TODO: change to try finally?
|
||||
print(s, p, o, "Not in store to remove")
|
||||
pending_adds = self.__pending_adds
|
||||
while pending_adds:
|
||||
(s, p, o) = pending_adds.pop()
|
||||
self.store.add((s, p, o))
|
||||
lock.release()
|
||||
@@ -0,0 +1,737 @@
|
||||
#
|
||||
#
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Collection,
|
||||
Dict,
|
||||
Generator,
|
||||
Iterator,
|
||||
Mapping,
|
||||
Optional,
|
||||
Set,
|
||||
Tuple,
|
||||
Union,
|
||||
overload,
|
||||
)
|
||||
|
||||
from rdflib.store import Store
|
||||
from rdflib.util import _coalesce
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from rdflib.graph import (
|
||||
Graph,
|
||||
_ContextType,
|
||||
_ObjectType,
|
||||
_PredicateType,
|
||||
_SubjectType,
|
||||
_TriplePatternType,
|
||||
_TripleType,
|
||||
)
|
||||
from rdflib.plugins.sparql.sparql import Query, Update
|
||||
from rdflib.query import Result
|
||||
from rdflib.term import Identifier, URIRef
|
||||
|
||||
__all__ = ["SimpleMemory", "Memory"]
|
||||
|
||||
ANY: None = None
|
||||
|
||||
|
||||
class SimpleMemory(Store):
|
||||
"""\
|
||||
A fast naive in memory implementation of a triple store.
|
||||
|
||||
This triple store uses nested dictionaries to store triples. Each
|
||||
triple is stored in two such indices as follows spo[s][p][o] = 1 and
|
||||
pos[p][o][s] = 1.
|
||||
|
||||
Authors: Michel Pelletier, Daniel Krech, Stefan Niederhauser
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
configuration: Optional[str] = None,
|
||||
identifier: Optional[Identifier] = None,
|
||||
):
|
||||
super(SimpleMemory, self).__init__(configuration)
|
||||
self.identifier = identifier
|
||||
|
||||
# indexed by [subject][predicate][object]
|
||||
self.__spo: Dict[_SubjectType, Dict[_PredicateType, Dict[_ObjectType, int]]] = (
|
||||
{}
|
||||
)
|
||||
|
||||
# indexed by [predicate][object][subject]
|
||||
self.__pos: Dict[_PredicateType, Dict[_ObjectType, Dict[_SubjectType, int]]] = (
|
||||
{}
|
||||
)
|
||||
|
||||
# indexed by [predicate][object][subject]
|
||||
self.__osp: Dict[_ObjectType, Dict[_SubjectType, Dict[_PredicateType, int]]] = (
|
||||
{}
|
||||
)
|
||||
|
||||
self.__namespace: Dict[str, URIRef] = {}
|
||||
self.__prefix: Dict[URIRef, str] = {}
|
||||
|
||||
def add(
|
||||
self,
|
||||
triple: _TripleType,
|
||||
context: _ContextType,
|
||||
quoted: bool = False,
|
||||
) -> None:
|
||||
"""\
|
||||
Add a triple to the store of triples.
|
||||
"""
|
||||
# add dictionary entries for spo[s][p][p] = 1 and pos[p][o][s]
|
||||
# = 1, creating the nested dictionaries where they do not yet
|
||||
# exits.
|
||||
subject, predicate, object = triple
|
||||
spo = self.__spo
|
||||
try:
|
||||
po = spo[subject]
|
||||
except: # noqa: E722
|
||||
po = spo[subject] = {}
|
||||
try:
|
||||
o = po[predicate]
|
||||
except: # noqa: E722
|
||||
o = po[predicate] = {}
|
||||
o[object] = 1
|
||||
|
||||
pos = self.__pos
|
||||
try:
|
||||
os = pos[predicate]
|
||||
except: # noqa: E722
|
||||
os = pos[predicate] = {}
|
||||
try:
|
||||
s = os[object]
|
||||
except: # noqa: E722
|
||||
s = os[object] = {}
|
||||
s[subject] = 1
|
||||
|
||||
osp = self.__osp
|
||||
try:
|
||||
sp = osp[object]
|
||||
except: # noqa: E722
|
||||
sp = osp[object] = {}
|
||||
try:
|
||||
p = sp[subject]
|
||||
except: # noqa: E722
|
||||
p = sp[subject] = {}
|
||||
p[predicate] = 1
|
||||
|
||||
def remove(
|
||||
self,
|
||||
triple_pattern: _TriplePatternType,
|
||||
context: Optional[_ContextType] = None,
|
||||
) -> None:
|
||||
for (subject, predicate, object), c in list(self.triples(triple_pattern)):
|
||||
del self.__spo[subject][predicate][object]
|
||||
del self.__pos[predicate][object][subject]
|
||||
del self.__osp[object][subject][predicate]
|
||||
|
||||
def triples(
|
||||
self,
|
||||
triple_pattern: _TriplePatternType,
|
||||
context: Optional[_ContextType] = None,
|
||||
) -> Iterator[Tuple[_TripleType, Iterator[Optional[_ContextType]]]]:
|
||||
"""A generator over all the triples matching"""
|
||||
subject, predicate, object = triple_pattern
|
||||
if subject != ANY: # subject is given
|
||||
spo = self.__spo
|
||||
if subject in spo:
|
||||
subjectDictionary = spo[subject] # noqa: N806
|
||||
if predicate != ANY: # subject+predicate is given
|
||||
if predicate in subjectDictionary:
|
||||
if object != ANY: # subject+predicate+object is given
|
||||
if object in subjectDictionary[predicate]:
|
||||
yield (subject, predicate, object), self.__contexts()
|
||||
else: # given object not found
|
||||
pass
|
||||
else: # subject+predicate is given, object unbound
|
||||
for o in subjectDictionary[predicate].keys():
|
||||
yield (subject, predicate, o), self.__contexts()
|
||||
else: # given predicate not found
|
||||
pass
|
||||
else: # subject given, predicate unbound
|
||||
for p in subjectDictionary.keys():
|
||||
if object != ANY: # object is given
|
||||
if object in subjectDictionary[p]:
|
||||
yield (subject, p, object), self.__contexts()
|
||||
else: # given object not found
|
||||
pass
|
||||
else: # object unbound
|
||||
for o in subjectDictionary[p].keys():
|
||||
yield (subject, p, o), self.__contexts()
|
||||
else: # given subject not found
|
||||
pass
|
||||
elif predicate != ANY: # predicate is given, subject unbound
|
||||
pos = self.__pos
|
||||
if predicate in pos:
|
||||
predicateDictionary = pos[predicate] # noqa: N806
|
||||
if object != ANY: # predicate+object is given, subject unbound
|
||||
if object in predicateDictionary:
|
||||
for s in predicateDictionary[object].keys():
|
||||
yield (s, predicate, object), self.__contexts()
|
||||
else: # given object not found
|
||||
pass
|
||||
else: # predicate is given, object+subject unbound
|
||||
for o in predicateDictionary.keys():
|
||||
for s in predicateDictionary[o].keys():
|
||||
yield (s, predicate, o), self.__contexts()
|
||||
elif object != ANY: # object is given, subject+predicate unbound
|
||||
osp = self.__osp
|
||||
if object in osp:
|
||||
objectDictionary = osp[object] # noqa: N806
|
||||
for s in objectDictionary.keys():
|
||||
for p in objectDictionary[s].keys():
|
||||
yield (s, p, object), self.__contexts()
|
||||
else: # subject+predicate+object unbound
|
||||
spo = self.__spo
|
||||
for s in spo.keys():
|
||||
subjectDictionary = spo[s] # noqa: N806
|
||||
for p in subjectDictionary.keys():
|
||||
for o in subjectDictionary[p].keys():
|
||||
yield (s, p, o), self.__contexts()
|
||||
|
||||
def __len__(self, context: Optional[_ContextType] = None) -> int:
|
||||
# @@ optimize
|
||||
i = 0
|
||||
for triple in self.triples((None, None, None)):
|
||||
i += 1
|
||||
return i
|
||||
|
||||
def bind(self, prefix: str, namespace: URIRef, override: bool = True) -> None:
|
||||
# should be identical to `Memory.bind`
|
||||
bound_namespace = self.__namespace.get(prefix)
|
||||
bound_prefix = _coalesce(
|
||||
self.__prefix.get(namespace),
|
||||
# type error: error: Argument 1 to "get" of "Mapping" has incompatible type "Optional[URIRef]"; expected "URIRef"
|
||||
self.__prefix.get(bound_namespace), # type: ignore[arg-type]
|
||||
)
|
||||
if override:
|
||||
if bound_prefix is not None:
|
||||
del self.__namespace[bound_prefix]
|
||||
if bound_namespace is not None:
|
||||
del self.__prefix[bound_namespace]
|
||||
self.__prefix[namespace] = prefix
|
||||
self.__namespace[prefix] = namespace
|
||||
else:
|
||||
# type error: Invalid index type "Optional[URIRef]" for "Dict[URIRef, str]"; expected type "URIRef"
|
||||
self.__prefix[_coalesce(bound_namespace, namespace)] = _coalesce( # type: ignore[index]
|
||||
bound_prefix, default=prefix
|
||||
)
|
||||
# type error: Invalid index type "Optional[str]" for "Dict[str, URIRef]"; expected type "str"
|
||||
self.__namespace[_coalesce(bound_prefix, prefix)] = _coalesce( # type: ignore[index]
|
||||
bound_namespace, default=namespace
|
||||
)
|
||||
|
||||
def namespace(self, prefix: str) -> Optional[URIRef]:
|
||||
return self.__namespace.get(prefix, None)
|
||||
|
||||
def prefix(self, namespace: URIRef) -> Optional[str]:
|
||||
return self.__prefix.get(namespace, None)
|
||||
|
||||
def namespaces(self) -> Iterator[Tuple[str, URIRef]]:
|
||||
for prefix, namespace in self.__namespace.items():
|
||||
yield prefix, namespace
|
||||
|
||||
def __contexts(self) -> Generator[_ContextType, None, None]:
|
||||
# TODO: best way to return empty generator
|
||||
# type error: Need type annotation for "c"
|
||||
return (c for c in []) # type: ignore[var-annotated]
|
||||
|
||||
# type error: Missing return statement
|
||||
def query( # type: ignore[return]
|
||||
self,
|
||||
query: Union[Query, str],
|
||||
initNs: Mapping[str, Any], # noqa: N803
|
||||
initBindings: Mapping[str, Identifier], # noqa: N803
|
||||
queryGraph: str, # noqa: N803
|
||||
**kwargs: Any,
|
||||
) -> Result:
|
||||
super(SimpleMemory, self).query(
|
||||
query, initNs, initBindings, queryGraph, **kwargs
|
||||
)
|
||||
|
||||
def update(
|
||||
self,
|
||||
update: Union[Update, str],
|
||||
initNs: Mapping[str, Any], # noqa: N803
|
||||
initBindings: Mapping[str, Identifier], # noqa: N803
|
||||
queryGraph: str, # noqa: N803
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
super(SimpleMemory, self).update(
|
||||
update, initNs, initBindings, queryGraph, **kwargs
|
||||
)
|
||||
|
||||
|
||||
class Memory(Store):
|
||||
"""\
|
||||
An in memory implementation of a triple store.
|
||||
|
||||
Same as SimpleMemory above, but is Context-aware, Graph-aware, and Formula-aware
|
||||
Authors: Ashley Sommer
|
||||
"""
|
||||
|
||||
context_aware = True
|
||||
formula_aware = True
|
||||
graph_aware = True
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
configuration: Optional[str] = None,
|
||||
identifier: Optional[Identifier] = None,
|
||||
):
|
||||
super(Memory, self).__init__(configuration)
|
||||
self.identifier = identifier
|
||||
|
||||
# indexed by [subject][predicate][object]
|
||||
self.__spo: Dict[_SubjectType, Dict[_PredicateType, Dict[_ObjectType, int]]] = (
|
||||
{}
|
||||
)
|
||||
|
||||
# indexed by [predicate][object][subject]
|
||||
self.__pos: Dict[_PredicateType, Dict[_ObjectType, Dict[_SubjectType, int]]] = (
|
||||
{}
|
||||
)
|
||||
|
||||
# indexed by [predicate][object][subject]
|
||||
self.__osp: Dict[_ObjectType, Dict[_SubjectType, Dict[_PredicateType, int]]] = (
|
||||
{}
|
||||
)
|
||||
|
||||
self.__namespace: Dict[str, URIRef] = {}
|
||||
self.__prefix: Dict[URIRef, str] = {}
|
||||
self.__context_obj_map: Dict[str, Graph] = {}
|
||||
self.__tripleContexts: Dict[_TripleType, Dict[Optional[str], bool]] = {}
|
||||
self.__contextTriples: Dict[Optional[str], Set[_TripleType]] = {None: set()}
|
||||
# all contexts used in store (unencoded)
|
||||
self.__all_contexts: Set[Graph] = set()
|
||||
# default context information for triples
|
||||
self.__defaultContexts: Optional[Dict[Optional[str], bool]] = None
|
||||
|
||||
def add(
|
||||
self,
|
||||
triple: _TripleType,
|
||||
context: _ContextType,
|
||||
quoted: bool = False,
|
||||
) -> None:
|
||||
"""\
|
||||
Add a triple to the store of triples.
|
||||
"""
|
||||
# add dictionary entries for spo[s][p][p] = 1 and pos[p][o][s]
|
||||
# = 1, creating the nested dictionaries where they do not yet
|
||||
# exits.
|
||||
Store.add(self, triple, context, quoted=quoted)
|
||||
if context is not None:
|
||||
self.__all_contexts.add(context)
|
||||
subject, predicate, object_ = triple
|
||||
|
||||
spo = self.__spo
|
||||
try:
|
||||
po = spo[subject]
|
||||
except LookupError:
|
||||
po = spo[subject] = {}
|
||||
try:
|
||||
o = po[predicate]
|
||||
except LookupError:
|
||||
o = po[predicate] = {}
|
||||
|
||||
try:
|
||||
_ = o[object_]
|
||||
# This cannot be reached if (s, p, o) was not inserted before.
|
||||
triple_exists = True
|
||||
except KeyError:
|
||||
o[object_] = 1
|
||||
triple_exists = False
|
||||
self.__add_triple_context(triple, triple_exists, context, quoted)
|
||||
|
||||
if triple_exists:
|
||||
# No need to insert twice this triple.
|
||||
return
|
||||
|
||||
pos = self.__pos
|
||||
try:
|
||||
os = pos[predicate]
|
||||
except LookupError:
|
||||
os = pos[predicate] = {}
|
||||
try:
|
||||
s = os[object_]
|
||||
except LookupError:
|
||||
s = os[object_] = {}
|
||||
s[subject] = 1
|
||||
|
||||
osp = self.__osp
|
||||
try:
|
||||
sp = osp[object_]
|
||||
except LookupError:
|
||||
sp = osp[object_] = {}
|
||||
try:
|
||||
p = sp[subject]
|
||||
except LookupError:
|
||||
p = sp[subject] = {}
|
||||
p[predicate] = 1
|
||||
|
||||
def remove(
|
||||
self,
|
||||
triple_pattern: _TriplePatternType,
|
||||
context: Optional[_ContextType] = None,
|
||||
) -> None:
|
||||
req_ctx = self.__ctx_to_str(context)
|
||||
for triple, c in self.triples(triple_pattern, context=context):
|
||||
subject, predicate, object_ = triple
|
||||
for ctx in self.__get_context_for_triple(triple):
|
||||
if context is not None and req_ctx != ctx:
|
||||
continue
|
||||
self.__remove_triple_context(triple, ctx)
|
||||
ctxs = self.__get_context_for_triple(triple, skipQuoted=True)
|
||||
if None in ctxs and (context is None or len(ctxs) == 1):
|
||||
# remove from default graph too
|
||||
self.__remove_triple_context(triple, None)
|
||||
if len(self.__get_context_for_triple(triple)) == 0:
|
||||
del self.__spo[subject][predicate][object_]
|
||||
del self.__pos[predicate][object_][subject]
|
||||
del self.__osp[object_][subject][predicate]
|
||||
del self.__tripleContexts[triple]
|
||||
if (
|
||||
req_ctx is not None
|
||||
and req_ctx in self.__contextTriples
|
||||
and len(self.__contextTriples[req_ctx]) == 0
|
||||
):
|
||||
# all triples are removed out of this context
|
||||
# and it's not the default context so delete it
|
||||
del self.__contextTriples[req_ctx]
|
||||
|
||||
if (
|
||||
triple_pattern == (None, None, None)
|
||||
and context in self.__all_contexts
|
||||
and not self.graph_aware
|
||||
):
|
||||
# remove the whole context
|
||||
self.__all_contexts.remove(context)
|
||||
|
||||
def triples(
|
||||
self,
|
||||
triple_pattern: _TriplePatternType,
|
||||
context: Optional[_ContextType] = None,
|
||||
) -> Generator[
|
||||
Tuple[_TripleType, Generator[Optional[_ContextType], None, None]],
|
||||
None,
|
||||
None,
|
||||
]:
|
||||
"""A generator over all the triples matching"""
|
||||
req_ctx = self.__ctx_to_str(context)
|
||||
subject, predicate, object_ = triple_pattern
|
||||
|
||||
# all triples case (no triple parts given as pattern)
|
||||
if subject is None and predicate is None and object_ is None:
|
||||
# Just dump all known triples from the given graph
|
||||
if req_ctx not in self.__contextTriples:
|
||||
return
|
||||
for triple in self.__contextTriples[req_ctx].copy():
|
||||
yield triple, self.__contexts(triple)
|
||||
|
||||
# optimize "triple in graph" case (all parts given)
|
||||
elif subject is not None and predicate is not None and object_ is not None:
|
||||
# type error: Incompatible types in assignment (expression has type "Tuple[Optional[IdentifiedNode], Optional[IdentifiedNode], Optional[Identifier]]", variable has type "Tuple[IdentifiedNode, IdentifiedNode, Identifier]")
|
||||
# NOTE on type error: at this point, all elements of triple_pattern
|
||||
# is not None, so it has the same type as triple
|
||||
triple = triple_pattern # type: ignore[assignment]
|
||||
try:
|
||||
_ = self.__spo[subject][predicate][object_]
|
||||
if self.__triple_has_context(triple, req_ctx):
|
||||
yield triple, self.__contexts(triple)
|
||||
except KeyError:
|
||||
return
|
||||
|
||||
elif subject is not None: # subject is given
|
||||
spo = self.__spo
|
||||
if subject in spo:
|
||||
subjectDictionary = spo[subject] # noqa: N806
|
||||
if predicate is not None: # subject+predicate is given
|
||||
if predicate in subjectDictionary:
|
||||
if object_ is not None: # subject+predicate+object is given
|
||||
if object_ in subjectDictionary[predicate]:
|
||||
triple = (subject, predicate, object_)
|
||||
if self.__triple_has_context(triple, req_ctx):
|
||||
yield triple, self.__contexts(triple)
|
||||
else: # given object not found
|
||||
pass
|
||||
else: # subject+predicate is given, object unbound
|
||||
for o in list(subjectDictionary[predicate].keys()):
|
||||
triple = (subject, predicate, o)
|
||||
if self.__triple_has_context(triple, req_ctx):
|
||||
yield triple, self.__contexts(triple)
|
||||
else: # given predicate not found
|
||||
pass
|
||||
else: # subject given, predicate unbound
|
||||
for p in list(subjectDictionary.keys()):
|
||||
if object_ is not None: # object is given
|
||||
if object_ in subjectDictionary[p]:
|
||||
triple = (subject, p, object_)
|
||||
if self.__triple_has_context(triple, req_ctx):
|
||||
yield triple, self.__contexts(triple)
|
||||
else: # given object not found
|
||||
pass
|
||||
else: # object unbound
|
||||
for o in list(subjectDictionary[p].keys()):
|
||||
triple = (subject, p, o)
|
||||
if self.__triple_has_context(triple, req_ctx):
|
||||
yield triple, self.__contexts(triple)
|
||||
else: # given subject not found
|
||||
pass
|
||||
elif predicate is not None: # predicate is given, subject unbound
|
||||
pos = self.__pos
|
||||
if predicate in pos:
|
||||
predicateDictionary = pos[predicate] # noqa: N806
|
||||
if object_ is not None: # predicate+object is given, subject unbound
|
||||
if object_ in predicateDictionary:
|
||||
for s in list(predicateDictionary[object_].keys()):
|
||||
triple = (s, predicate, object_)
|
||||
if self.__triple_has_context(triple, req_ctx):
|
||||
yield triple, self.__contexts(triple)
|
||||
else: # given object not found
|
||||
pass
|
||||
else: # predicate is given, object+subject unbound
|
||||
for o in list(predicateDictionary.keys()):
|
||||
for s in list(predicateDictionary[o].keys()):
|
||||
triple = (s, predicate, o)
|
||||
if self.__triple_has_context(triple, req_ctx):
|
||||
yield triple, self.__contexts(triple)
|
||||
elif object_ is not None: # object is given, subject+predicate unbound
|
||||
osp = self.__osp
|
||||
if object_ in osp:
|
||||
objectDictionary = osp[object_] # noqa: N806
|
||||
for s in list(objectDictionary.keys()):
|
||||
for p in list(objectDictionary[s].keys()):
|
||||
triple = (s, p, object_)
|
||||
if self.__triple_has_context(triple, req_ctx):
|
||||
yield triple, self.__contexts(triple)
|
||||
else: # subject+predicate+object unbound
|
||||
# Shouldn't get here if all other cases above worked correctly.
|
||||
spo = self.__spo
|
||||
for s in list(spo.keys()):
|
||||
subjectDictionary = spo[s] # noqa: N806
|
||||
for p in list(subjectDictionary.keys()):
|
||||
for o in list(subjectDictionary[p].keys()):
|
||||
triple = (s, p, o)
|
||||
if self.__triple_has_context(triple, req_ctx):
|
||||
yield triple, self.__contexts(triple)
|
||||
|
||||
def bind(self, prefix: str, namespace: URIRef, override: bool = True) -> None:
|
||||
# should be identical to `SimpleMemory.bind`
|
||||
bound_namespace = self.__namespace.get(prefix)
|
||||
bound_prefix = _coalesce(
|
||||
self.__prefix.get(namespace),
|
||||
# type error: error: Argument 1 to "get" of "Mapping" has incompatible type "Optional[URIRef]"; expected "URIRef"
|
||||
self.__prefix.get(bound_namespace), # type: ignore[arg-type]
|
||||
)
|
||||
if override:
|
||||
if bound_prefix is not None:
|
||||
del self.__namespace[bound_prefix]
|
||||
if bound_namespace is not None:
|
||||
del self.__prefix[bound_namespace]
|
||||
self.__prefix[namespace] = prefix
|
||||
self.__namespace[prefix] = namespace
|
||||
else:
|
||||
# type error: Invalid index type "Optional[URIRef]" for "Dict[URIRef, str]"; expected type "URIRef"
|
||||
self.__prefix[_coalesce(bound_namespace, namespace)] = _coalesce( # type: ignore[index]
|
||||
bound_prefix, default=prefix
|
||||
)
|
||||
# type error: Invalid index type "Optional[str]" for "Dict[str, URIRef]"; expected type "str"
|
||||
# type error: Incompatible types in assignment (expression has type "Optional[URIRef]", target has type "URIRef")
|
||||
self.__namespace[_coalesce(bound_prefix, prefix)] = _coalesce( # type: ignore[index]
|
||||
bound_namespace, default=namespace
|
||||
)
|
||||
|
||||
def namespace(self, prefix: str) -> Optional[URIRef]:
|
||||
return self.__namespace.get(prefix, None)
|
||||
|
||||
def prefix(self, namespace: URIRef) -> Optional[str]:
|
||||
return self.__prefix.get(namespace, None)
|
||||
|
||||
def namespaces(self) -> Iterator[Tuple[str, URIRef]]:
|
||||
for prefix, namespace in self.__namespace.items():
|
||||
yield prefix, namespace
|
||||
|
||||
def contexts(
|
||||
self, triple: Optional[_TripleType] = None
|
||||
) -> Generator[_ContextType, None, None]:
|
||||
if triple is None or triple == (None, None, None):
|
||||
return (context for context in self.__all_contexts)
|
||||
|
||||
subj, pred, obj = triple
|
||||
try:
|
||||
_ = self.__spo[subj][pred][obj]
|
||||
return self.__contexts(triple)
|
||||
except KeyError:
|
||||
return (_ for _ in [])
|
||||
|
||||
def __len__(self, context: Optional[_ContextType] = None) -> int:
|
||||
ctx = self.__ctx_to_str(context)
|
||||
if ctx not in self.__contextTriples:
|
||||
return 0
|
||||
return len(self.__contextTriples[ctx])
|
||||
|
||||
def add_graph(self, graph: Graph) -> None:
|
||||
if not self.graph_aware:
|
||||
Store.add_graph(self, graph)
|
||||
else:
|
||||
self.__all_contexts.add(graph)
|
||||
|
||||
def remove_graph(self, graph: Graph) -> None:
|
||||
if not self.graph_aware:
|
||||
Store.remove_graph(self, graph)
|
||||
else:
|
||||
self.remove((None, None, None), graph)
|
||||
try:
|
||||
self.__all_contexts.remove(graph)
|
||||
except KeyError:
|
||||
pass # we didn't know this graph, no problem
|
||||
|
||||
# internal utility methods below
|
||||
def __add_triple_context(
|
||||
self,
|
||||
triple: _TripleType,
|
||||
triple_exists: bool,
|
||||
context: Optional[_ContextType],
|
||||
quoted: bool,
|
||||
) -> None:
|
||||
"""add the given context to the set of contexts for the triple"""
|
||||
ctx = self.__ctx_to_str(context)
|
||||
quoted = bool(quoted)
|
||||
if triple_exists:
|
||||
# we know the triple exists somewhere in the store
|
||||
try:
|
||||
triple_context = self.__tripleContexts[triple]
|
||||
except KeyError:
|
||||
# triple exists with default ctx info
|
||||
# start with a copy of the default ctx info
|
||||
# type error: Item "None" of "Optional[Dict[Optional[str], bool]]" has no attribute "copy"
|
||||
triple_context = self.__tripleContexts[triple] = (
|
||||
self.__defaultContexts.copy() # type: ignore[union-attr]
|
||||
)
|
||||
|
||||
triple_context[ctx] = quoted
|
||||
|
||||
if not quoted:
|
||||
triple_context[None] = quoted
|
||||
|
||||
else:
|
||||
# the triple didn't exist before in the store
|
||||
if quoted: # this context only
|
||||
triple_context = self.__tripleContexts[triple] = {ctx: quoted}
|
||||
else: # default context as well
|
||||
triple_context = self.__tripleContexts[triple] = {
|
||||
ctx: quoted,
|
||||
None: quoted,
|
||||
}
|
||||
|
||||
# if the triple is not quoted add it to the default context
|
||||
if not quoted:
|
||||
self.__contextTriples[None].add(triple)
|
||||
|
||||
# always add the triple to given context, making sure it's initialized
|
||||
if ctx not in self.__contextTriples:
|
||||
self.__contextTriples[ctx] = set()
|
||||
self.__contextTriples[ctx].add(triple)
|
||||
|
||||
# if this is the first ever triple in the store, set default ctx info
|
||||
if self.__defaultContexts is None:
|
||||
self.__defaultContexts = triple_context
|
||||
# if the context info is the same as default, no need to store it
|
||||
if triple_context == self.__defaultContexts:
|
||||
del self.__tripleContexts[triple]
|
||||
|
||||
def __get_context_for_triple(
|
||||
self, triple: _TripleType, skipQuoted: bool = False # noqa: N803
|
||||
) -> Collection[Optional[str]]:
|
||||
"""return a list of contexts (str) for the triple, skipping
|
||||
quoted contexts if skipQuoted==True"""
|
||||
|
||||
ctxs = self.__tripleContexts.get(triple, self.__defaultContexts)
|
||||
|
||||
if not skipQuoted:
|
||||
# type error: Item "None" of "Optional[Dict[Optional[str], bool]]" has no attribute "keys"
|
||||
return ctxs.keys() # type: ignore[union-attr]
|
||||
|
||||
# type error: Item "None" of "Optional[Dict[Optional[str], bool]]" has no attribute "items"
|
||||
return [ctx for ctx, quoted in ctxs.items() if not quoted] # type: ignore[union-attr]
|
||||
|
||||
def __triple_has_context(self, triple: _TripleType, ctx: Optional[str]) -> bool:
|
||||
"""return True if the triple exists in the given context"""
|
||||
# type error: Unsupported right operand type for in ("Optional[Dict[Optional[str], bool]]")
|
||||
return ctx in self.__tripleContexts.get(triple, self.__defaultContexts) # type: ignore[operator]
|
||||
|
||||
def __remove_triple_context(self, triple: _TripleType, ctx):
|
||||
"""remove the context from the triple"""
|
||||
# type error: Item "None" of "Optional[Dict[Optional[str], bool]]" has no attribute "copy"
|
||||
ctxs = self.__tripleContexts.get(triple, self.__defaultContexts).copy() # type: ignore[union-attr]
|
||||
del ctxs[ctx]
|
||||
if ctxs == self.__defaultContexts:
|
||||
del self.__tripleContexts[triple]
|
||||
else:
|
||||
self.__tripleContexts[triple] = ctxs
|
||||
self.__contextTriples[ctx].remove(triple)
|
||||
|
||||
@overload
|
||||
def __ctx_to_str(self, ctx: _ContextType) -> str: ...
|
||||
|
||||
@overload
|
||||
def __ctx_to_str(self, ctx: None) -> None: ...
|
||||
|
||||
def __ctx_to_str(self, ctx: Optional[_ContextType]) -> Optional[str]:
|
||||
if ctx is None:
|
||||
return None
|
||||
try:
|
||||
# ctx could be a graph. In that case, use its identifier
|
||||
ctx_str = "{}:{}".format(ctx.identifier.__class__.__name__, ctx.identifier)
|
||||
self.__context_obj_map[ctx_str] = ctx
|
||||
return ctx_str
|
||||
except AttributeError:
|
||||
# otherwise, ctx should be a URIRef or BNode or str
|
||||
# NOTE on type errors: This is actually never called with ctx value as str in all unit tests, so this seems like it should just not be here.
|
||||
# type error: Subclass of "Graph" and "str" cannot exist: would have incompatible method signatures
|
||||
if isinstance(ctx, str): # type: ignore[unreachable]
|
||||
# type error: Statement is unreachable
|
||||
ctx_str = "{}:{}".format(ctx.__class__.__name__, ctx) # type: ignore[unreachable]
|
||||
if ctx_str in self.__context_obj_map:
|
||||
return ctx_str
|
||||
self.__context_obj_map[ctx_str] = ctx
|
||||
return ctx_str
|
||||
raise RuntimeError("Cannot use that type of object as a Graph context")
|
||||
|
||||
def __contexts(self, triple: _TripleType) -> Generator[_ContextType, None, None]:
|
||||
"""return a generator for all the non-quoted contexts
|
||||
(dereferenced) the encoded triple appears in"""
|
||||
# type error: Argument 2 to "get" of "Mapping" has incompatible type "str"; expected "Optional[Graph]"
|
||||
return (
|
||||
self.__context_obj_map.get(ctx_str, ctx_str) # type: ignore[arg-type]
|
||||
for ctx_str in self.__get_context_for_triple(triple, skipQuoted=True)
|
||||
if ctx_str is not None
|
||||
)
|
||||
|
||||
# type error: Missing return statement
|
||||
def query( # type: ignore[return]
|
||||
self,
|
||||
query: Union[Query, str],
|
||||
initNs: Mapping[str, Any], # noqa: N803
|
||||
initBindings: Mapping[str, Identifier], # noqa: N803
|
||||
queryGraph: str, # noqa: N803
|
||||
**kwargs,
|
||||
) -> Result:
|
||||
super(Memory, self).query(query, initNs, initBindings, queryGraph, **kwargs)
|
||||
|
||||
def update(
|
||||
self,
|
||||
update: Union[Update, Any],
|
||||
initNs: Mapping[str, Any], # noqa: N803
|
||||
initBindings: Mapping[str, Identifier], # noqa: N803
|
||||
queryGraph: str, # noqa: N803
|
||||
**kwargs,
|
||||
) -> None:
|
||||
super(Memory, self).update(update, initNs, initBindings, queryGraph, **kwargs)
|
||||
+174
@@ -0,0 +1,174 @@
|
||||
"""
|
||||
This wrapper intercepts calls through the store interface which make use of
|
||||
the REGEXTerm class to represent matches by REGEX instead of literal
|
||||
comparison.
|
||||
|
||||
Implemented for stores that don't support this and essentially
|
||||
provides the support by replacing the REGEXTerms by wildcards (None) and
|
||||
matching against the results from the store it's wrapping.
|
||||
"""
|
||||
|
||||
import re
|
||||
|
||||
from rdflib.graph import Graph
|
||||
from rdflib.store import Store
|
||||
|
||||
# Store is capable of doing its own REGEX matching
|
||||
NATIVE_REGEX = 0
|
||||
# Store uses Python's re module internally for REGEX matching
|
||||
PYTHON_REGEX = 1
|
||||
|
||||
|
||||
class REGEXTerm(str):
|
||||
"""
|
||||
REGEXTerm can be used in any term slot and is interpreted as a request to
|
||||
perform a REGEX match (not a string comparison) using the value
|
||||
(pre-compiled) for checking rdf:type matches
|
||||
"""
|
||||
|
||||
def __init__(self, expr):
|
||||
self.compiledExpr = re.compile(expr)
|
||||
|
||||
def __reduce__(self):
|
||||
return (REGEXTerm, ("",))
|
||||
|
||||
|
||||
def regexCompareQuad(quad, regexQuad): # noqa: N802, N803
|
||||
for index in range(4):
|
||||
if isinstance(regexQuad[index], REGEXTerm) and not regexQuad[
|
||||
index
|
||||
].compiledExpr.match(quad[index]):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
class REGEXMatching(Store):
|
||||
def __init__(self, storage):
|
||||
self.storage = storage
|
||||
self.context_aware = storage.context_aware
|
||||
# NOTE: this store can't be formula_aware as it doesn't have enough
|
||||
# info to reverse the removal of a quoted statement.
|
||||
self.formula_aware = storage.formula_aware
|
||||
self.transaction_aware = storage.transaction_aware
|
||||
|
||||
def open(self, configuration, create=True):
|
||||
return self.storage.open(configuration, create)
|
||||
|
||||
def close(self, commit_pending_transaction=False):
|
||||
self.storage.close()
|
||||
|
||||
def destroy(self, configuration):
|
||||
self.storage.destroy(configuration)
|
||||
|
||||
def add(self, triple, context, quoted=False):
|
||||
(subject, predicate, object_) = triple
|
||||
self.storage.add((subject, predicate, object_), context, quoted)
|
||||
|
||||
def remove(self, triple, context=None):
|
||||
(subject, predicate, object_) = triple
|
||||
if (
|
||||
isinstance(subject, REGEXTerm)
|
||||
or isinstance(predicate, REGEXTerm)
|
||||
or isinstance(object_, REGEXTerm)
|
||||
or (context is not None and isinstance(context.identifier, REGEXTerm))
|
||||
):
|
||||
# One or more of the terms is a REGEX expression, so we must
|
||||
# replace it / them with wildcard(s)and match after we query.
|
||||
s = not isinstance(subject, REGEXTerm) and subject or None
|
||||
p = not isinstance(predicate, REGEXTerm) and predicate or None
|
||||
o = not isinstance(object_, REGEXTerm) and object_ or None
|
||||
c = (
|
||||
(context is not None and not isinstance(context.identifier, REGEXTerm))
|
||||
and context
|
||||
or None
|
||||
)
|
||||
|
||||
removeQuadList = [] # noqa: N806
|
||||
for (s1, p1, o1), cg in self.storage.triples((s, p, o), c):
|
||||
for ctx in cg:
|
||||
ctx = ctx.identifier
|
||||
if regexCompareQuad(
|
||||
(s1, p1, o1, ctx),
|
||||
(
|
||||
subject,
|
||||
predicate,
|
||||
object_,
|
||||
context is not None and context.identifier or context,
|
||||
),
|
||||
):
|
||||
removeQuadList.append((s1, p1, o1, ctx))
|
||||
for s, p, o, c in removeQuadList:
|
||||
self.storage.remove((s, p, o), c and Graph(self, c) or c)
|
||||
else:
|
||||
self.storage.remove((subject, predicate, object_), context)
|
||||
|
||||
def triples(self, triple, context=None):
|
||||
(subject, predicate, object_) = triple
|
||||
if (
|
||||
isinstance(subject, REGEXTerm)
|
||||
or isinstance(predicate, REGEXTerm)
|
||||
or isinstance(object_, REGEXTerm)
|
||||
or (context is not None and isinstance(context.identifier, REGEXTerm))
|
||||
):
|
||||
# One or more of the terms is a REGEX expression, so we must
|
||||
# replace it / them with wildcard(s) and match after we query.
|
||||
s = not isinstance(subject, REGEXTerm) and subject or None
|
||||
p = not isinstance(predicate, REGEXTerm) and predicate or None
|
||||
o = not isinstance(object_, REGEXTerm) and object_ or None
|
||||
c = (
|
||||
(context is not None and not isinstance(context.identifier, REGEXTerm))
|
||||
and context
|
||||
or None
|
||||
)
|
||||
for (s1, p1, o1), cg in self.storage.triples((s, p, o), c):
|
||||
matchingCtxs = [] # noqa: N806
|
||||
for ctx in cg:
|
||||
if c is None:
|
||||
if context is None or context.identifier.compiledExpr.match(
|
||||
ctx.identifier
|
||||
):
|
||||
matchingCtxs.append(ctx)
|
||||
else:
|
||||
matchingCtxs.append(ctx)
|
||||
if matchingCtxs and regexCompareQuad(
|
||||
(s1, p1, o1, None), (subject, predicate, object_, None)
|
||||
):
|
||||
yield (s1, p1, o1), (c for c in matchingCtxs)
|
||||
else:
|
||||
for (s1, p1, o1), cg in self.storage.triples(
|
||||
(subject, predicate, object_), context
|
||||
):
|
||||
yield (s1, p1, o1), cg
|
||||
|
||||
def __len__(self, context=None):
|
||||
# NOTE: If the context is a REGEX this could be an expensive
|
||||
# proposition
|
||||
return self.storage.__len__(context)
|
||||
|
||||
def contexts(self, triple=None):
|
||||
# NOTE: There is no way to control REGEX matching for this method at
|
||||
# this level as it only returns the contexts, not the matching
|
||||
# triples.
|
||||
for ctx in self.storage.contexts(triple):
|
||||
yield ctx
|
||||
|
||||
def remove_context(self, identifier):
|
||||
self.storage.remove((None, None, None), identifier)
|
||||
|
||||
def bind(self, prefix, namespace, override=True):
|
||||
self.storage.bind(prefix, namespace, override=override)
|
||||
|
||||
def prefix(self, namespace):
|
||||
return self.storage.prefix(namespace)
|
||||
|
||||
def namespace(self, prefix):
|
||||
return self.storage.namespace(prefix)
|
||||
|
||||
def namespaces(self):
|
||||
return self.storage.namespaces()
|
||||
|
||||
def commit(self):
|
||||
self.storage.commit()
|
||||
|
||||
def rollback(self):
|
||||
self.storage.rollback()
|
||||
+192
@@ -0,0 +1,192 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import copy
|
||||
import logging
|
||||
from io import BytesIO
|
||||
from typing import TYPE_CHECKING, Optional, Tuple
|
||||
from urllib.error import HTTPError
|
||||
from urllib.parse import urlencode
|
||||
from urllib.request import Request, urlopen
|
||||
|
||||
from rdflib.query import Result
|
||||
from rdflib.term import BNode
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import typing_extensions as te
|
||||
|
||||
|
||||
class SPARQLConnectorException(Exception): # noqa: N818
|
||||
pass
|
||||
|
||||
|
||||
# TODO: Pull in these from the result implementation plugins?
|
||||
_response_mime_types = {
|
||||
"xml": "application/sparql-results+xml, application/rdf+xml",
|
||||
"json": "application/sparql-results+json",
|
||||
"csv": "text/csv",
|
||||
"tsv": "text/tab-separated-values",
|
||||
"application/rdf+xml": "application/rdf+xml",
|
||||
}
|
||||
|
||||
|
||||
class SPARQLConnector:
|
||||
"""
|
||||
this class deals with nitty gritty details of talking to a SPARQL server
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
query_endpoint: Optional[str] = None,
|
||||
update_endpoint: Optional[str] = None,
|
||||
returnFormat: str = "xml", # noqa: N803
|
||||
method: te.Literal["GET", "POST", "POST_FORM"] = "GET",
|
||||
auth: Optional[Tuple[str, str]] = None,
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
auth, if present, must be a tuple of (username, password) used for Basic Authentication
|
||||
|
||||
Any additional keyword arguments will be passed to to the request, and can be used to setup timeouts etc.
|
||||
"""
|
||||
self._method: str
|
||||
self.returnFormat = returnFormat
|
||||
self.query_endpoint = query_endpoint
|
||||
self.update_endpoint = update_endpoint
|
||||
self.kwargs = kwargs
|
||||
self.method = method
|
||||
if auth is not None:
|
||||
if type(auth) is not tuple:
|
||||
raise SPARQLConnectorException("auth must be a tuple")
|
||||
if len(auth) != 2:
|
||||
raise SPARQLConnectorException("auth must be a tuple (user, password)")
|
||||
base64string = base64.b64encode(bytes("%s:%s" % auth, "ascii"))
|
||||
self.kwargs.setdefault("headers", {})
|
||||
self.kwargs["headers"].update(
|
||||
{"Authorization": "Basic %s" % base64string.decode("utf-8")}
|
||||
)
|
||||
|
||||
@property
|
||||
def method(self) -> str:
|
||||
return self._method
|
||||
|
||||
@method.setter
|
||||
def method(self, method: str) -> None:
|
||||
if method not in ("GET", "POST", "POST_FORM"):
|
||||
raise SPARQLConnectorException(
|
||||
'Method must be "GET", "POST", or "POST_FORM"'
|
||||
)
|
||||
|
||||
self._method = method
|
||||
|
||||
def query(
|
||||
self,
|
||||
query: str,
|
||||
default_graph: Optional[str] = None,
|
||||
named_graph: Optional[str] = None,
|
||||
) -> Result:
|
||||
if not self.query_endpoint:
|
||||
raise SPARQLConnectorException("Query endpoint not set!")
|
||||
|
||||
params = {}
|
||||
# this test ensures we don't have a useless (BNode) default graph URI, which calls to Graph().query() will add
|
||||
if default_graph is not None and type(default_graph) is not BNode:
|
||||
params["default-graph-uri"] = default_graph
|
||||
|
||||
headers = {"Accept": _response_mime_types[self.returnFormat]}
|
||||
|
||||
args = copy.deepcopy(self.kwargs)
|
||||
|
||||
# merge params/headers dicts
|
||||
args.setdefault("params", {})
|
||||
|
||||
args.setdefault("headers", {})
|
||||
args["headers"].update(headers)
|
||||
|
||||
if self.method == "GET":
|
||||
params["query"] = query
|
||||
args["params"].update(params)
|
||||
qsa = "?" + urlencode(args["params"])
|
||||
try:
|
||||
res = urlopen(
|
||||
Request(self.query_endpoint + qsa, headers=args["headers"])
|
||||
)
|
||||
except Exception as e: # noqa: F841
|
||||
raise ValueError(
|
||||
"You did something wrong formulating either the URI or your SPARQL query"
|
||||
)
|
||||
elif self.method == "POST":
|
||||
args["headers"].update({"Content-Type": "application/sparql-query"})
|
||||
args["params"].update(params)
|
||||
qsa = "?" + urlencode(args["params"])
|
||||
try:
|
||||
res = urlopen(
|
||||
Request(
|
||||
self.query_endpoint + qsa,
|
||||
data=query.encode(),
|
||||
headers=args["headers"],
|
||||
)
|
||||
)
|
||||
except HTTPError as e:
|
||||
# type error: Incompatible return value type (got "Tuple[int, str, None]", expected "Result")
|
||||
return e.code, str(e), None # type: ignore[return-value]
|
||||
elif self.method == "POST_FORM":
|
||||
params["query"] = query
|
||||
args["params"].update(params)
|
||||
try:
|
||||
res = urlopen(
|
||||
Request(
|
||||
self.query_endpoint,
|
||||
data=urlencode(args["params"]).encode(),
|
||||
headers=args["headers"],
|
||||
)
|
||||
)
|
||||
except HTTPError as e:
|
||||
# type error: Incompatible return value type (got "Tuple[int, str, None]", expected "Result")
|
||||
return e.code, str(e), None # type: ignore[return-value]
|
||||
else:
|
||||
raise SPARQLConnectorException("Unknown method %s" % self.method)
|
||||
return Result.parse(
|
||||
BytesIO(res.read()), content_type=res.headers["Content-Type"].split(";")[0]
|
||||
)
|
||||
|
||||
def update(
|
||||
self,
|
||||
query: str,
|
||||
default_graph: Optional[str] = None,
|
||||
named_graph: Optional[str] = None,
|
||||
) -> None:
|
||||
if not self.update_endpoint:
|
||||
raise SPARQLConnectorException("Query endpoint not set!")
|
||||
|
||||
params = {}
|
||||
|
||||
if default_graph is not None:
|
||||
params["using-graph-uri"] = default_graph
|
||||
|
||||
if named_graph is not None:
|
||||
params["using-named-graph-uri"] = named_graph
|
||||
|
||||
headers = {
|
||||
"Accept": _response_mime_types[self.returnFormat],
|
||||
"Content-Type": "application/sparql-update; charset=UTF-8",
|
||||
}
|
||||
|
||||
args = copy.deepcopy(self.kwargs) # other QSAs
|
||||
|
||||
args.setdefault("params", {})
|
||||
args["params"].update(params)
|
||||
args.setdefault("headers", {})
|
||||
args["headers"].update(headers)
|
||||
|
||||
qsa = "?" + urlencode(args["params"])
|
||||
res = urlopen( # noqa: F841
|
||||
Request(
|
||||
self.update_endpoint + qsa, data=query.encode(), headers=args["headers"]
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
__all__ = ["SPARQLConnector", "SPARQLConnectorException"]
|
||||
@@ -0,0 +1,999 @@
|
||||
"""
|
||||
This is an RDFLib store around Ivan Herman et al.'s SPARQL service wrapper.
|
||||
This was first done in layer-cake, and then ported to RDFLib
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import collections
|
||||
import re
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Callable,
|
||||
Dict,
|
||||
Generator,
|
||||
Iterable,
|
||||
Iterator,
|
||||
List,
|
||||
Mapping,
|
||||
Optional,
|
||||
Tuple,
|
||||
Union,
|
||||
overload,
|
||||
)
|
||||
|
||||
from rdflib.graph import DATASET_DEFAULT_GRAPH_ID, Graph
|
||||
from rdflib.plugins.stores.regexmatching import NATIVE_REGEX
|
||||
from rdflib.store import Store
|
||||
from rdflib.term import BNode, Identifier, Node, URIRef, Variable
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import typing_extensions as te # noqa: I001
|
||||
from rdflib.graph import (
|
||||
_TripleType,
|
||||
_ContextType,
|
||||
_QuadType,
|
||||
_TriplePatternType,
|
||||
_SubjectType,
|
||||
_PredicateType,
|
||||
_ObjectType,
|
||||
_ContextIdentifierType,
|
||||
)
|
||||
from rdflib.plugins.sparql.sparql import Query, Update
|
||||
from rdflib.query import Result, ResultRow
|
||||
|
||||
from .sparqlconnector import SPARQLConnector
|
||||
|
||||
# Defines some SPARQL keywords
|
||||
LIMIT = "LIMIT"
|
||||
OFFSET = "OFFSET"
|
||||
ORDERBY = "ORDER BY"
|
||||
|
||||
BNODE_IDENT_PATTERN = re.compile(r"(?P<label>_\:[^\s]+)")
|
||||
|
||||
_NodeToSparql = Callable[["Node"], str]
|
||||
|
||||
|
||||
def _node_to_sparql(node: Node) -> str:
|
||||
if isinstance(node, BNode):
|
||||
raise Exception(
|
||||
"SPARQLStore does not support BNodes! "
|
||||
"See http://www.w3.org/TR/sparql11-query/#BGPsparqlBNodes"
|
||||
)
|
||||
return node.n3()
|
||||
|
||||
|
||||
class SPARQLStore(SPARQLConnector, Store):
|
||||
"""An RDFLib store around a SPARQL endpoint
|
||||
|
||||
This is context-aware and should work as expected
|
||||
when a context is specified.
|
||||
|
||||
For ConjunctiveGraphs, reading is done from the "default graph". Exactly
|
||||
what this means depends on your endpoint, because SPARQL does not offer a
|
||||
simple way to query the union of all graphs as it would be expected for a
|
||||
ConjuntiveGraph. This is why we recommend using Dataset instead, which is
|
||||
motivated by the SPARQL 1.1.
|
||||
|
||||
Fuseki/TDB has a flag for specifying that the default graph
|
||||
is the union of all graphs (``tdb:unionDefaultGraph`` in the Fuseki config).
|
||||
|
||||
.. warning:: By default the SPARQL Store does not support blank-nodes!
|
||||
|
||||
As blank-nodes act as variables in SPARQL queries,
|
||||
there is no way to query for a particular blank node without
|
||||
using non-standard SPARQL extensions.
|
||||
|
||||
See http://www.w3.org/TR/sparql11-query/#BGPsparqlBNodes
|
||||
|
||||
You can make use of such extensions through the ``node_to_sparql``
|
||||
argument. For example if you want to transform BNode('0001') into
|
||||
"<bnode:b0001>", you can use a function like this:
|
||||
|
||||
>>> def my_bnode_ext(node):
|
||||
... if isinstance(node, BNode):
|
||||
... return '<bnode:b%s>' % node
|
||||
... return _node_to_sparql(node)
|
||||
>>> store = SPARQLStore('http://dbpedia.org/sparql',
|
||||
... node_to_sparql=my_bnode_ext)
|
||||
|
||||
You can request a particular result serialization with the
|
||||
``returnFormat`` parameter. This is a string that must have a
|
||||
matching plugin registered. Built in is support for ``xml``,
|
||||
``json``, ``csv``, ``tsv`` and ``application/rdf+xml``.
|
||||
|
||||
The underlying SPARQLConnector uses the urllib library.
|
||||
Any extra kwargs passed to the SPARQLStore connector are passed to
|
||||
urllib when doing HTTP calls. I.e. you have full control of
|
||||
cookies/auth/headers.
|
||||
|
||||
Form example:
|
||||
|
||||
>>> store = SPARQLStore('...my endpoint ...', auth=('user','pass'))
|
||||
|
||||
will use HTTP basic auth.
|
||||
|
||||
"""
|
||||
|
||||
formula_aware = False
|
||||
transaction_aware = False
|
||||
graph_aware = True
|
||||
regex_matching = NATIVE_REGEX
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
query_endpoint: Optional[str] = None,
|
||||
sparql11: bool = True,
|
||||
context_aware: bool = True,
|
||||
node_to_sparql: _NodeToSparql = _node_to_sparql,
|
||||
returnFormat: str = "xml", # noqa: N803
|
||||
auth: Optional[Tuple[str, str]] = None,
|
||||
**sparqlconnector_kwargs,
|
||||
):
|
||||
super(SPARQLStore, self).__init__(
|
||||
query_endpoint=query_endpoint,
|
||||
returnFormat=returnFormat,
|
||||
auth=auth,
|
||||
**sparqlconnector_kwargs,
|
||||
)
|
||||
|
||||
self.node_to_sparql = node_to_sparql
|
||||
self.nsBindings: Dict[str, Any] = {}
|
||||
self.sparql11 = sparql11
|
||||
self.context_aware = context_aware
|
||||
self.graph_aware = context_aware
|
||||
self._queries = 0
|
||||
|
||||
# type error: Missing return statement
|
||||
def open(self, configuration: str, create: bool = False) -> Optional[int]: # type: ignore[return]
|
||||
"""This method is included so that calls to this Store via Graph, e.g. Graph("SPARQLStore"),
|
||||
can set the required parameters
|
||||
"""
|
||||
if type(configuration) == str: # noqa: E721
|
||||
self.query_endpoint = configuration
|
||||
else:
|
||||
raise Exception(
|
||||
"configuration must be a string (a single query endpoint URI)"
|
||||
)
|
||||
|
||||
# Database Management Methods
|
||||
def create(self, configuration: str) -> None:
|
||||
raise TypeError(
|
||||
"The SPARQL Store is read only. Try SPARQLUpdateStore for read/write."
|
||||
)
|
||||
|
||||
def destroy(self, configuration: str) -> None:
|
||||
raise TypeError("The SPARQL store is read only")
|
||||
|
||||
# Transactional interfaces
|
||||
def commit(self) -> None:
|
||||
raise TypeError("The SPARQL store is read only")
|
||||
|
||||
def rollback(self) -> None:
|
||||
raise TypeError("The SPARQL store is read only")
|
||||
|
||||
def add(
|
||||
self, _: _TripleType, context: _ContextType = None, quoted: bool = False
|
||||
) -> None:
|
||||
raise TypeError("The SPARQL store is read only")
|
||||
|
||||
def addN(self, quads: Iterable[_QuadType]) -> None: # noqa: N802
|
||||
raise TypeError("The SPARQL store is read only")
|
||||
|
||||
# type error: Signature of "remove" incompatible with supertype "Store"
|
||||
def remove( # type: ignore[override]
|
||||
self, _: _TriplePatternType, context: Optional[_ContextType]
|
||||
) -> None:
|
||||
raise TypeError("The SPARQL store is read only")
|
||||
|
||||
# type error: Signature of "update" incompatible with supertype "SPARQLConnector"
|
||||
def update( # type: ignore[override]
|
||||
self,
|
||||
query: Union[Update, str],
|
||||
initNs: Dict[str, Any] = {}, # noqa: N803
|
||||
initBindings: Dict[str, Identifier] = {}, # noqa: N803
|
||||
queryGraph: Identifier = None, # noqa: N803
|
||||
DEBUG: bool = False, # noqa: N803
|
||||
) -> None:
|
||||
raise TypeError("The SPARQL store is read only")
|
||||
|
||||
def _query(self, *args: Any, **kwargs: Any) -> Result:
|
||||
self._queries += 1
|
||||
|
||||
return super(SPARQLStore, self).query(*args, **kwargs)
|
||||
|
||||
def _inject_prefixes(self, query: str, extra_bindings: Mapping[str, Any]) -> str:
|
||||
bindings = set(list(self.nsBindings.items()) + list(extra_bindings.items()))
|
||||
if not bindings:
|
||||
return query
|
||||
return "\n".join(
|
||||
[
|
||||
"\n".join(["PREFIX %s: <%s>" % (k, v) for k, v in bindings]),
|
||||
"", # separate ns_bindings from query with an empty line
|
||||
query,
|
||||
]
|
||||
)
|
||||
|
||||
# type error: Signature of "query" incompatible with supertype "SPARQLConnector"
|
||||
# type error: Signature of "query" incompatible with supertype "Store"
|
||||
def query( # type: ignore[override]
|
||||
self,
|
||||
query: Union[Query, str],
|
||||
initNs: Optional[Mapping[str, Any]] = None, # noqa: N803
|
||||
initBindings: Optional[Mapping[str, Identifier]] = None, # noqa: N803
|
||||
queryGraph: Optional[str] = None, # noqa: N803
|
||||
DEBUG: bool = False, # noqa: N803
|
||||
) -> Result:
|
||||
self.debug = DEBUG
|
||||
assert isinstance(query, str)
|
||||
|
||||
if initNs is not None and len(initNs) > 0:
|
||||
query = self._inject_prefixes(query, initNs)
|
||||
|
||||
if initBindings:
|
||||
if not self.sparql11:
|
||||
raise Exception("initBindings not supported for SPARQL 1.0 Endpoints.")
|
||||
v = list(initBindings)
|
||||
|
||||
# VALUES was added to SPARQL 1.1 on 2012/07/24
|
||||
query += "\nVALUES ( %s )\n{ ( %s ) }\n" % (
|
||||
" ".join("?" + str(x) for x in v),
|
||||
" ".join(self.node_to_sparql(initBindings[x]) for x in v),
|
||||
)
|
||||
|
||||
return self._query(
|
||||
query, default_graph=queryGraph if self._is_contextual(queryGraph) else None
|
||||
)
|
||||
|
||||
# type error: Return type "Iterator[Tuple[Tuple[Node, Node, Node], None]]" of "triples" incompatible with return type "Iterator[Tuple[Tuple[Node, Node, Node], Iterator[Optional[Graph]]]]"
|
||||
def triples( # type: ignore[override]
|
||||
self, spo: _TriplePatternType, context: Optional[_ContextType] = None
|
||||
) -> Iterator[Tuple[_TripleType, None]]:
|
||||
"""
|
||||
- tuple **(s, o, p)**
|
||||
the triple used as filter for the SPARQL select.
|
||||
(None, None, None) means anything.
|
||||
- context **context**
|
||||
the graph effectively calling this method.
|
||||
|
||||
Returns a tuple of triples executing essentially a SPARQL like
|
||||
SELECT ?subj ?pred ?obj WHERE { ?subj ?pred ?obj }
|
||||
|
||||
**context** may include three parameter
|
||||
to refine the underlying query:
|
||||
|
||||
* LIMIT: an integer to limit the number of results
|
||||
* OFFSET: an integer to enable paging of results
|
||||
* ORDERBY: an instance of Variable('s'), Variable('o') or Variable('p') or, by default, the first 'None' from the given triple
|
||||
|
||||
.. warning::
|
||||
|
||||
- Using LIMIT or OFFSET automatically include ORDERBY otherwise this is
|
||||
because the results are retrieved in a not deterministic way (depends on
|
||||
the walking path on the graph)
|
||||
- Using OFFSET without defining LIMIT will discard the first OFFSET - 1 results
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
a_graph.LIMIT = limit
|
||||
a_graph.OFFSET = offset
|
||||
triple_generator = a_graph.triples(mytriple):
|
||||
# do something
|
||||
# Removes LIMIT and OFFSET if not required for the next triple() calls
|
||||
del a_graph.LIMIT
|
||||
del a_graph.OFFSET
|
||||
"""
|
||||
|
||||
s, p, o = spo
|
||||
|
||||
vars = []
|
||||
if not s:
|
||||
s = Variable("s")
|
||||
vars.append(s)
|
||||
|
||||
if not p:
|
||||
p = Variable("p")
|
||||
vars.append(p)
|
||||
if not o:
|
||||
o = Variable("o")
|
||||
vars.append(o)
|
||||
|
||||
if vars:
|
||||
v = " ".join([term.n3() for term in vars])
|
||||
verb = "SELECT %s " % v
|
||||
else:
|
||||
verb = "ASK"
|
||||
|
||||
nts = self.node_to_sparql
|
||||
query = "%s { %s %s %s }" % (verb, nts(s), nts(p), nts(o))
|
||||
|
||||
# The ORDER BY is necessary
|
||||
if (
|
||||
hasattr(context, LIMIT)
|
||||
or hasattr(context, OFFSET)
|
||||
or hasattr(context, ORDERBY)
|
||||
):
|
||||
var = None
|
||||
if isinstance(s, Variable):
|
||||
var = s
|
||||
elif isinstance(p, Variable):
|
||||
var = p
|
||||
elif isinstance(o, Variable):
|
||||
var = o
|
||||
elif hasattr(context, ORDERBY) and isinstance(
|
||||
getattr(context, ORDERBY), Variable
|
||||
):
|
||||
var = getattr(context, ORDERBY)
|
||||
# type error: Item "None" of "Optional[Variable]" has no attribute "n3"
|
||||
query = query + " %s %s" % (ORDERBY, var.n3()) # type: ignore[union-attr]
|
||||
|
||||
try:
|
||||
query = query + " LIMIT %s" % int(getattr(context, LIMIT))
|
||||
except (ValueError, TypeError, AttributeError):
|
||||
pass
|
||||
try:
|
||||
query = query + " OFFSET %s" % int(getattr(context, OFFSET))
|
||||
except (ValueError, TypeError, AttributeError):
|
||||
pass
|
||||
|
||||
result = self._query(
|
||||
query,
|
||||
# type error: Item "None" of "Optional[Graph]" has no attribute "identifier"
|
||||
default_graph=context.identifier if self._is_contextual(context) else None, # type: ignore[union-attr]
|
||||
)
|
||||
|
||||
if vars:
|
||||
if type(result) is tuple:
|
||||
if result[0] == 401:
|
||||
raise ValueError(
|
||||
"It looks like you need to authenticate with this SPARQL Store. HTTP unauthorized"
|
||||
)
|
||||
for row in result:
|
||||
if TYPE_CHECKING:
|
||||
# This will be a ResultRow because if vars is truthish then
|
||||
# the query will be a SELECT query.
|
||||
assert isinstance(row, ResultRow)
|
||||
yield (
|
||||
# type error: No overload variant of "get" of "ResultRow" matches argument types "Node", "Node"
|
||||
row.get(s, s), # type: ignore[call-overload]
|
||||
row.get(p, p), # type: ignore[call-overload]
|
||||
row.get(o, o), # type: ignore[call-overload]
|
||||
), None # why is the context here not the passed in graph 'context'?
|
||||
else:
|
||||
if result.askAnswer:
|
||||
yield (s, p, o), None
|
||||
|
||||
def triples_choices(
|
||||
self,
|
||||
_: Tuple[
|
||||
Union[_SubjectType, List[_SubjectType]],
|
||||
Union[_PredicateType, List[_PredicateType]],
|
||||
Union[_ObjectType, List[_ObjectType]],
|
||||
],
|
||||
context: Optional[_ContextType] = None,
|
||||
) -> Generator[
|
||||
Tuple[
|
||||
Tuple[_SubjectType, _PredicateType, _ObjectType],
|
||||
Iterator[Optional[_ContextType]],
|
||||
],
|
||||
None,
|
||||
None,
|
||||
]:
|
||||
"""
|
||||
A variant of triples that can take a list of terms instead of a
|
||||
single term in any slot. Stores can implement this to optimize
|
||||
the response time from the import default 'fallback' implementation,
|
||||
which will iterate over each term in the list and dispatch to
|
||||
triples.
|
||||
"""
|
||||
raise NotImplementedError("Triples choices currently not supported")
|
||||
|
||||
def __len__(self, context: Optional[_ContextType] = None) -> int:
|
||||
if not self.sparql11:
|
||||
raise NotImplementedError(
|
||||
"For performance reasons, this is not"
|
||||
+ "supported for sparql1.0 endpoints"
|
||||
)
|
||||
else:
|
||||
q = "SELECT (count(*) as ?c) WHERE {?s ?p ?o .}"
|
||||
|
||||
result = self._query(
|
||||
q,
|
||||
# type error: Item "None" of "Optional[Graph]" has no attribute "identifier"
|
||||
default_graph=(
|
||||
context.identifier # type: ignore[union-attr]
|
||||
if self._is_contextual(context)
|
||||
else None
|
||||
),
|
||||
)
|
||||
# type error: Item "Tuple[Node, ...]" of "Union[Tuple[Node, Node, Node], bool, ResultRow]" has no attribute "c"
|
||||
return int(next(iter(result)).c) # type: ignore[union-attr]
|
||||
|
||||
# type error: Return type "Generator[Identifier, None, None]" of "contexts" incompatible with return type "Generator[Graph, None, None]" in supertype "Store"
|
||||
def contexts( # type: ignore[override]
|
||||
self, triple: Optional[_TripleType] = None
|
||||
) -> Generator[_ContextIdentifierType, None, None]:
|
||||
"""
|
||||
Iterates over results to "SELECT ?NAME { GRAPH ?NAME { ?s ?p ?o } }"
|
||||
or "SELECT ?NAME { GRAPH ?NAME {} }" if triple is `None`.
|
||||
|
||||
Returns instances of this store with the SPARQL wrapper
|
||||
object updated via addNamedGraph(?NAME).
|
||||
|
||||
This causes a named-graph-uri key / value pair to be sent over
|
||||
the protocol.
|
||||
|
||||
Please note that some SPARQL endpoints are not able to find empty named
|
||||
graphs.
|
||||
"""
|
||||
|
||||
if triple:
|
||||
nts = self.node_to_sparql
|
||||
s, p, o = triple
|
||||
params = (
|
||||
nts(s if s else Variable("s")),
|
||||
nts(p if p else Variable("p")),
|
||||
nts(o if o else Variable("o")),
|
||||
)
|
||||
q = "SELECT ?name WHERE { GRAPH ?name { %s %s %s }}" % params
|
||||
else:
|
||||
q = "SELECT ?name WHERE { GRAPH ?name {} }"
|
||||
|
||||
result = self._query(q)
|
||||
# type error: Item "bool" of "Union[Tuple[Node, Node, Node], bool, ResultRow]" has no attribute "name"
|
||||
# error: Generator has incompatible item type "Union[Any, Identifier]"; expected "IdentifiedNode"
|
||||
return (row.name for row in result) # type: ignore[union-attr,misc]
|
||||
|
||||
# Namespace persistence interface implementation
|
||||
def bind(self, prefix: str, namespace: URIRef, override: bool = True) -> None:
|
||||
bound_prefix = self.prefix(namespace)
|
||||
if override and bound_prefix:
|
||||
del self.nsBindings[bound_prefix]
|
||||
self.nsBindings[prefix] = namespace
|
||||
|
||||
def prefix(self, namespace: URIRef) -> Optional[str]:
|
||||
""" """
|
||||
return dict([(v, k) for k, v in self.nsBindings.items()]).get(namespace)
|
||||
|
||||
def namespace(self, prefix: str) -> Optional[URIRef]:
|
||||
return self.nsBindings.get(prefix)
|
||||
|
||||
def namespaces(self) -> Iterator[Tuple[str, URIRef]]:
|
||||
for prefix, ns in self.nsBindings.items():
|
||||
yield prefix, ns
|
||||
|
||||
def add_graph(self, graph: Graph) -> None:
|
||||
raise TypeError("The SPARQL store is read only")
|
||||
|
||||
def remove_graph(self, graph: Graph) -> None:
|
||||
raise TypeError("The SPARQL store is read only")
|
||||
|
||||
@overload
|
||||
def _is_contextual(self, graph: None) -> te.Literal[False]: ...
|
||||
|
||||
@overload
|
||||
def _is_contextual(self, graph: Optional[Union[Graph, str]]) -> bool: ...
|
||||
|
||||
def _is_contextual(self, graph: Optional[Union[Graph, str]]) -> bool:
|
||||
"""Returns `True` if the "GRAPH" keyword must appear
|
||||
in the final SPARQL query sent to the endpoint.
|
||||
"""
|
||||
if (not self.context_aware) or (graph is None):
|
||||
return False
|
||||
if isinstance(graph, str):
|
||||
return graph != "__UNION__"
|
||||
else:
|
||||
return graph.identifier != DATASET_DEFAULT_GRAPH_ID
|
||||
|
||||
def subjects(
|
||||
self,
|
||||
predicate: Optional[_PredicateType] = None,
|
||||
object: Optional[_ObjectType] = None,
|
||||
) -> Generator[_SubjectType, None, None]:
|
||||
"""A generator of subjects with the given predicate and object"""
|
||||
for t, c in self.triples((None, predicate, object)):
|
||||
yield t[0]
|
||||
|
||||
def predicates(
|
||||
self,
|
||||
subject: Optional[_SubjectType] = None,
|
||||
object: Optional[_ObjectType] = None,
|
||||
) -> Generator[_PredicateType, None, None]:
|
||||
"""A generator of predicates with the given subject and object"""
|
||||
for t, c in self.triples((subject, None, object)):
|
||||
yield t[1]
|
||||
|
||||
def objects(
|
||||
self,
|
||||
subject: Optional[_SubjectType] = None,
|
||||
predicate: Optional[_PredicateType] = None,
|
||||
) -> Generator[_ObjectType, None, None]:
|
||||
"""A generator of objects with the given subject and predicate"""
|
||||
for t, c in self.triples((subject, predicate, None)):
|
||||
yield t[2]
|
||||
|
||||
def subject_predicates(
|
||||
self, object: Optional[_ObjectType] = None
|
||||
) -> Generator[Tuple[_SubjectType, _PredicateType], None, None]:
|
||||
"""A generator of (subject, predicate) tuples for the given object"""
|
||||
for t, c in self.triples((None, None, object)):
|
||||
yield t[0], t[1]
|
||||
|
||||
def subject_objects(
|
||||
self, predicate: Optional[_PredicateType] = None
|
||||
) -> Generator[Tuple[_SubjectType, _ObjectType], None, None]:
|
||||
"""A generator of (subject, object) tuples for the given predicate"""
|
||||
for t, c in self.triples((None, predicate, None)):
|
||||
yield t[0], t[2]
|
||||
|
||||
def predicate_objects(
|
||||
self, subject: Optional[_SubjectType] = None
|
||||
) -> Generator[Tuple[_PredicateType, _ObjectType], None, None]:
|
||||
"""A generator of (predicate, object) tuples for the given subject"""
|
||||
for t, c in self.triples((subject, None, None)):
|
||||
yield t[1], t[2]
|
||||
|
||||
|
||||
class SPARQLUpdateStore(SPARQLStore):
|
||||
"""A store using SPARQL queries for reading and SPARQL Update for changes.
|
||||
|
||||
This can be context-aware, if so, any changes will be to the given named
|
||||
graph only.
|
||||
|
||||
In favor of the SPARQL 1.1 motivated Dataset, we advise against using this
|
||||
with ConjunctiveGraphs, as it reads and writes from and to the
|
||||
"default graph". Exactly what this means depends on the endpoint and can
|
||||
result in confusion.
|
||||
|
||||
For Graph objects, everything works as expected.
|
||||
|
||||
See the :class:`SPARQLStore` base class for more information.
|
||||
|
||||
"""
|
||||
|
||||
where_pattern = re.compile(r"""(?P<where>WHERE\s*\{)""", re.IGNORECASE)
|
||||
|
||||
##############################################################
|
||||
# Regex for injecting GRAPH blocks into updates on a context #
|
||||
##############################################################
|
||||
|
||||
# Observations on the SPARQL grammar (http://www.w3.org/TR/2013/REC-sparql11-query-20130321/):
|
||||
# 1. Only the terminals STRING_LITERAL1, STRING_LITERAL2,
|
||||
# STRING_LITERAL_LONG1, STRING_LITERAL_LONG2, and comments can contain
|
||||
# curly braces.
|
||||
# 2. The non-terminals introduce curly braces in pairs only.
|
||||
# 3. Unescaped " can occur only in strings and comments.
|
||||
# 3. Unescaped ' can occur only in strings, comments, and IRIRefs.
|
||||
# 4. \ always escapes the following character, especially \", \', and
|
||||
# \\ denote literal ", ', and \ respectively.
|
||||
# 5. # always starts a comment outside of string and IRI
|
||||
# 6. A comment ends at the next newline
|
||||
# 7. IRIREFs need to be detected, as they may contain # without starting a comment
|
||||
# 8. PrefixedNames do not contain a #
|
||||
# As a consequence, it should be rather easy to detect strings and comments
|
||||
# in order to avoid unbalanced curly braces.
|
||||
|
||||
# From the SPARQL grammar
|
||||
STRING_LITERAL1 = "'([^'\\\\]|\\\\.)*'"
|
||||
STRING_LITERAL2 = '"([^"\\\\]|\\\\.)*"'
|
||||
STRING_LITERAL_LONG1 = "'''(('|'')?([^'\\\\]|\\\\.))*'''"
|
||||
STRING_LITERAL_LONG2 = '"""(("|"")?([^"\\\\]|\\\\.))*"""'
|
||||
String = "(%s)|(%s)|(%s)|(%s)" % (
|
||||
STRING_LITERAL1,
|
||||
STRING_LITERAL2,
|
||||
STRING_LITERAL_LONG1,
|
||||
STRING_LITERAL_LONG2,
|
||||
)
|
||||
IRIREF = '<([^<>"{}|^`\\]\\\\[\\x00-\\x20])*>'
|
||||
COMMENT = "#[^\\x0D\\x0A]*([\\x0D\\x0A]|\\Z)"
|
||||
|
||||
# Simplified grammar to find { at beginning and } at end of blocks
|
||||
BLOCK_START = "{"
|
||||
BLOCK_END = "}"
|
||||
ESCAPED = "\\\\."
|
||||
|
||||
# Match anything that doesn't start or end a block:
|
||||
BlockContent = "(%s)|(%s)|(%s)|(%s)" % (String, IRIREF, COMMENT, ESCAPED)
|
||||
BlockFinding = "(?P<block_start>%s)|(?P<block_end>%s)|(?P<block_content>%s)" % (
|
||||
BLOCK_START,
|
||||
BLOCK_END,
|
||||
BlockContent,
|
||||
)
|
||||
BLOCK_FINDING_PATTERN = re.compile(BlockFinding)
|
||||
|
||||
# Note that BLOCK_FINDING_PATTERN.finditer() will not cover the whole
|
||||
# string with matches. Everything that is not matched will have to be
|
||||
# part of the modified query as is.
|
||||
|
||||
##################################################################
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
query_endpoint: Optional[str] = None,
|
||||
update_endpoint: Optional[str] = None,
|
||||
sparql11: bool = True,
|
||||
context_aware: bool = True,
|
||||
postAsEncoded: bool = True, # noqa: N803
|
||||
autocommit: bool = True,
|
||||
dirty_reads: bool = False,
|
||||
**kwds,
|
||||
):
|
||||
"""
|
||||
:param autocommit if set, the store will commit after every
|
||||
writing operations. If False, we only make queries on the
|
||||
server once commit is called.
|
||||
|
||||
:param dirty_reads if set, we do not commit before reading. So you
|
||||
cannot read what you wrote before manually calling commit.
|
||||
|
||||
"""
|
||||
|
||||
SPARQLStore.__init__(
|
||||
self,
|
||||
query_endpoint,
|
||||
sparql11,
|
||||
context_aware,
|
||||
update_endpoint=update_endpoint,
|
||||
**kwds,
|
||||
)
|
||||
|
||||
self.postAsEncoded = postAsEncoded
|
||||
self.autocommit = autocommit
|
||||
self.dirty_reads = dirty_reads
|
||||
self._edits: Optional[List[str]] = None
|
||||
self._updates = 0
|
||||
|
||||
def query(self, *args: Any, **kwargs: Any) -> Result:
|
||||
if not self.autocommit and not self.dirty_reads:
|
||||
self.commit()
|
||||
return SPARQLStore.query(self, *args, **kwargs)
|
||||
|
||||
# type error: Signature of "triples" incompatible with supertype "Store"
|
||||
def triples( # type: ignore[override]
|
||||
self, *args: Any, **kwargs: Any
|
||||
) -> Iterator[Tuple[_TripleType, None]]:
|
||||
if not self.autocommit and not self.dirty_reads:
|
||||
self.commit()
|
||||
return SPARQLStore.triples(self, *args, **kwargs)
|
||||
|
||||
# type error: Signature of "contexts" incompatible with supertype "Store"
|
||||
def contexts( # type: ignore[override]
|
||||
self, *args: Any, **kwargs: Any
|
||||
) -> Generator[_ContextIdentifierType, None, None]:
|
||||
if not self.autocommit and not self.dirty_reads:
|
||||
self.commit()
|
||||
return SPARQLStore.contexts(self, *args, **kwargs)
|
||||
|
||||
def __len__(self, *args: Any, **kwargs: Any) -> int:
|
||||
if not self.autocommit and not self.dirty_reads:
|
||||
self.commit()
|
||||
return SPARQLStore.__len__(self, *args, **kwargs)
|
||||
|
||||
def open(
|
||||
self, configuration: Union[str, Tuple[str, str]], create: bool = False
|
||||
) -> None:
|
||||
"""
|
||||
sets the endpoint URLs for this SPARQLStore
|
||||
|
||||
:param configuration: either a tuple of (query_endpoint, update_endpoint),
|
||||
or a string with the endpoint which is configured as query and update endpoint
|
||||
:param create: if True an exception is thrown.
|
||||
"""
|
||||
|
||||
if create:
|
||||
raise Exception("Cannot create a SPARQL Endpoint")
|
||||
|
||||
if isinstance(configuration, tuple):
|
||||
self.query_endpoint = configuration[0]
|
||||
if len(configuration) > 1:
|
||||
self.update_endpoint = configuration[1]
|
||||
else:
|
||||
self.query_endpoint = configuration
|
||||
self.update_endpoint = configuration
|
||||
|
||||
def _transaction(self) -> List[str]:
|
||||
if self._edits is None:
|
||||
self._edits = []
|
||||
return self._edits
|
||||
|
||||
# Transactional interfaces
|
||||
def commit(self) -> None:
|
||||
"""add(), addN(), and remove() are transactional to reduce overhead of many small edits.
|
||||
Read and update() calls will automatically commit any outstanding edits.
|
||||
This should behave as expected most of the time, except that alternating writes
|
||||
and reads can degenerate to the original call-per-triple situation that originally existed.
|
||||
"""
|
||||
if self._edits and len(self._edits) > 0:
|
||||
self._update("\n;\n".join(self._edits))
|
||||
self._edits = None
|
||||
|
||||
def rollback(self) -> None:
|
||||
self._edits = None
|
||||
|
||||
def add(
|
||||
self,
|
||||
spo: _TripleType,
|
||||
context: Optional[_ContextType] = None,
|
||||
quoted: bool = False,
|
||||
) -> None:
|
||||
"""Add a triple to the store of triples."""
|
||||
|
||||
if not self.update_endpoint:
|
||||
raise Exception("UpdateEndpoint is not set")
|
||||
|
||||
assert not quoted
|
||||
(subject, predicate, obj) = spo
|
||||
|
||||
nts = self.node_to_sparql
|
||||
triple = "%s %s %s ." % (nts(subject), nts(predicate), nts(obj))
|
||||
if self._is_contextual(context):
|
||||
if TYPE_CHECKING:
|
||||
# _is_contextual will never return true if context is None
|
||||
assert context is not None
|
||||
q = "INSERT DATA { GRAPH %s { %s } }" % (nts(context.identifier), triple)
|
||||
else:
|
||||
q = "INSERT DATA { %s }" % triple
|
||||
self._transaction().append(q)
|
||||
if self.autocommit:
|
||||
self.commit()
|
||||
|
||||
def addN(self, quads: Iterable[_QuadType]) -> None: # noqa: N802
|
||||
"""Add a list of quads to the store."""
|
||||
if not self.update_endpoint:
|
||||
raise Exception("UpdateEndpoint is not set - call 'open'")
|
||||
|
||||
contexts = collections.defaultdict(list)
|
||||
for subject, predicate, obj, context in quads:
|
||||
contexts[context].append((subject, predicate, obj))
|
||||
data: List[str] = []
|
||||
nts = self.node_to_sparql
|
||||
for context in contexts:
|
||||
triples = [
|
||||
"%s %s %s ." % (nts(subject), nts(predicate), nts(obj))
|
||||
for subject, predicate, obj in contexts[context]
|
||||
]
|
||||
data.append(
|
||||
"INSERT DATA { GRAPH %s { %s } }\n"
|
||||
% (nts(context.identifier), "\n".join(triples))
|
||||
)
|
||||
self._transaction().extend(data)
|
||||
if self.autocommit:
|
||||
self.commit()
|
||||
|
||||
# type error: Signature of "remove" incompatible with supertype "Store"
|
||||
def remove( # type: ignore[override]
|
||||
self, spo: _TriplePatternType, context: Optional[_ContextType]
|
||||
) -> None:
|
||||
"""Remove a triple from the store"""
|
||||
if not self.update_endpoint:
|
||||
raise Exception("UpdateEndpoint is not set - call 'open'")
|
||||
|
||||
(subject, predicate, obj) = spo
|
||||
if not subject:
|
||||
subject = Variable("S")
|
||||
if not predicate:
|
||||
predicate = Variable("P")
|
||||
if not obj:
|
||||
obj = Variable("O")
|
||||
|
||||
nts = self.node_to_sparql
|
||||
triple = "%s %s %s ." % (nts(subject), nts(predicate), nts(obj))
|
||||
if self._is_contextual(context):
|
||||
if TYPE_CHECKING:
|
||||
# _is_contextual will never return true if context is None
|
||||
assert context is not None
|
||||
cid = nts(context.identifier)
|
||||
q = "WITH %(graph)s DELETE { %(triple)s } WHERE { %(triple)s }" % {
|
||||
"graph": cid,
|
||||
"triple": triple,
|
||||
}
|
||||
else:
|
||||
q = "DELETE { %s } WHERE { %s } " % (triple, triple)
|
||||
self._transaction().append(q)
|
||||
if self.autocommit:
|
||||
self.commit()
|
||||
|
||||
def setTimeout(self, timeout) -> None: # noqa: N802
|
||||
self._timeout = int(timeout)
|
||||
|
||||
def _update(self, update):
|
||||
self._updates += 1
|
||||
|
||||
SPARQLConnector.update(self, update)
|
||||
|
||||
# type error: Signature of "update" incompatible with supertype "SPARQLConnector"
|
||||
# type error: Signature of "update" incompatible with supertype "Store"
|
||||
def update( # type: ignore[override]
|
||||
self,
|
||||
query: Union[Update, str],
|
||||
initNs: Dict[str, Any] = {}, # noqa: N803
|
||||
initBindings: Dict[str, Identifier] = {}, # noqa: N803
|
||||
queryGraph: Optional[str] = None, # noqa: N803
|
||||
DEBUG: bool = False, # noqa: N803
|
||||
):
|
||||
"""
|
||||
Perform a SPARQL Update Query against the endpoint,
|
||||
INSERT, LOAD, DELETE etc.
|
||||
Setting initNs adds PREFIX declarations to the beginning of
|
||||
the update. Setting initBindings adds inline VALUEs to the
|
||||
beginning of every WHERE clause. By the SPARQL grammar, all
|
||||
operations that support variables (namely INSERT and DELETE)
|
||||
require a WHERE clause.
|
||||
Important: initBindings fails if the update contains the
|
||||
substring 'WHERE {' which does not denote a WHERE clause, e.g.
|
||||
if it is part of a literal.
|
||||
|
||||
.. admonition:: Context-aware query rewriting
|
||||
|
||||
- **When:** If context-awareness is enabled and the graph is not the default graph of the store.
|
||||
- **Why:** To ensure consistency with the :class:`~rdflib.plugins.stores.memory.Memory` store.
|
||||
The graph must accept "local" SPARQL requests (requests with no GRAPH keyword)
|
||||
as if it was the default graph.
|
||||
- **What is done:** These "local" queries are rewritten by this store.
|
||||
The content of each block of a SPARQL Update operation is wrapped in a GRAPH block
|
||||
except if the block is empty.
|
||||
This basically causes INSERT, INSERT DATA, DELETE, DELETE DATA and WHERE to operate
|
||||
only on the context.
|
||||
- **Example:** ``"INSERT DATA { <urn:michel> <urn:likes> <urn:pizza> }"`` is converted into
|
||||
``"INSERT DATA { GRAPH <urn:graph> { <urn:michel> <urn:likes> <urn:pizza> } }"``.
|
||||
- **Warning:** Queries are presumed to be "local" but this assumption is **not checked**.
|
||||
For instance, if the query already contains GRAPH blocks, the latter will be wrapped in new GRAPH blocks.
|
||||
- **Warning:** A simplified grammar is used that should tolerate
|
||||
extensions of the SPARQL grammar. Still, the process may fail in
|
||||
uncommon situations and produce invalid output.
|
||||
|
||||
"""
|
||||
if not self.update_endpoint:
|
||||
raise Exception("Update endpoint is not set!")
|
||||
|
||||
self.debug = DEBUG
|
||||
assert isinstance(query, str)
|
||||
query = self._inject_prefixes(query, initNs)
|
||||
|
||||
if self._is_contextual(queryGraph):
|
||||
if TYPE_CHECKING:
|
||||
# _is_contextual will never return true if context is None
|
||||
assert queryGraph is not None
|
||||
query = self._insert_named_graph(query, queryGraph)
|
||||
|
||||
if initBindings:
|
||||
# For INSERT and DELETE the WHERE clause is obligatory
|
||||
# (http://www.w3.org/TR/2013/REC-sparql11-query-20130321/#rModify)
|
||||
# Other query types do not allow variables and don't
|
||||
# have a WHERE clause. This also works for updates with
|
||||
# more than one INSERT/DELETE.
|
||||
v = list(initBindings)
|
||||
values = "\nVALUES ( %s )\n{ ( %s ) }\n" % (
|
||||
" ".join("?" + str(x) for x in v),
|
||||
" ".join(self.node_to_sparql(initBindings[x]) for x in v),
|
||||
)
|
||||
|
||||
query = self.where_pattern.sub("WHERE { " + values, query)
|
||||
|
||||
self._transaction().append(query)
|
||||
if self.autocommit:
|
||||
self.commit()
|
||||
|
||||
def _insert_named_graph(self, query: str, query_graph: str) -> str:
|
||||
"""
|
||||
Inserts GRAPH <query_graph> {} into blocks of SPARQL Update operations
|
||||
|
||||
For instance, "INSERT DATA { <urn:michel> <urn:likes> <urn:pizza> }"
|
||||
is converted into
|
||||
"INSERT DATA { GRAPH <urn:graph> { <urn:michel> <urn:likes> <urn:pizza> } }"
|
||||
"""
|
||||
if isinstance(query_graph, Node):
|
||||
query_graph = self.node_to_sparql(query_graph)
|
||||
else:
|
||||
query_graph = "<%s>" % query_graph
|
||||
graph_block_open = " GRAPH %s {" % query_graph
|
||||
graph_block_close = "} "
|
||||
|
||||
# SPARQL Update supports the following operations:
|
||||
# LOAD, CLEAR, DROP, ADD, MOVE, COPY, CREATE, INSERT DATA, DELETE DATA, DELETE/INSERT, DELETE WHERE
|
||||
# LOAD, CLEAR, DROP, ADD, MOVE, COPY, CREATE do not make much sense in a context.
|
||||
# INSERT DATA, DELETE DATA, and DELETE WHERE require the contents of their block to be wrapped in a GRAPH <?> { }.
|
||||
# DELETE/INSERT supports the WITH keyword, which sets the graph to be
|
||||
# used for all following DELETE/INSERT instruction including the
|
||||
# non-optional WHERE block. Equivalently, a GRAPH block can be added to
|
||||
# all blocks.
|
||||
#
|
||||
# Strategy employed here: Wrap the contents of every top-level block into a `GRAPH <?> { }`.
|
||||
|
||||
level = 0
|
||||
modified_query = []
|
||||
pos = 0
|
||||
for match in self.BLOCK_FINDING_PATTERN.finditer(query):
|
||||
if match.group("block_start") is not None:
|
||||
level += 1
|
||||
if level == 1:
|
||||
modified_query.append(query[pos : match.end()])
|
||||
modified_query.append(graph_block_open)
|
||||
pos = match.end()
|
||||
elif match.group("block_end") is not None:
|
||||
if level == 1:
|
||||
since_previous_pos = query[pos : match.start()]
|
||||
if modified_query[-1] is graph_block_open and (
|
||||
since_previous_pos == "" or since_previous_pos.isspace()
|
||||
):
|
||||
# In this case, adding graph_block_start and
|
||||
# graph_block_end results in an empty GRAPH block. Some
|
||||
# endpoints (e.g. TDB) can not handle this. Therefore
|
||||
# remove the previously added block_start.
|
||||
modified_query.pop()
|
||||
modified_query.append(since_previous_pos)
|
||||
else:
|
||||
modified_query.append(since_previous_pos)
|
||||
modified_query.append(graph_block_close)
|
||||
pos = match.start()
|
||||
level -= 1
|
||||
modified_query.append(query[pos:])
|
||||
|
||||
return "".join(modified_query)
|
||||
|
||||
def add_graph(self, graph: Graph) -> None:
|
||||
if not self.graph_aware:
|
||||
Store.add_graph(self, graph)
|
||||
elif graph.identifier != DATASET_DEFAULT_GRAPH_ID:
|
||||
self.update("CREATE GRAPH %s" % self.node_to_sparql(graph.identifier))
|
||||
|
||||
def remove_graph(self, graph: Graph) -> None:
|
||||
if not self.graph_aware:
|
||||
Store.remove_graph(self, graph)
|
||||
elif graph.identifier == DATASET_DEFAULT_GRAPH_ID:
|
||||
self.update("DROP DEFAULT")
|
||||
else:
|
||||
self.update("DROP GRAPH %s" % self.node_to_sparql(graph.identifier))
|
||||
|
||||
def subjects(
|
||||
self,
|
||||
predicate: Optional[_PredicateType] = None,
|
||||
object: Optional[_ObjectType] = None,
|
||||
) -> Generator[_SubjectType, None, None]:
|
||||
"""A generator of subjects with the given predicate and object"""
|
||||
for t, c in self.triples((None, predicate, object)):
|
||||
yield t[0]
|
||||
|
||||
def predicates(
|
||||
self,
|
||||
subject: Optional[_SubjectType] = None,
|
||||
object: Optional[_ObjectType] = None,
|
||||
) -> Generator[_PredicateType, None, None]:
|
||||
"""A generator of predicates with the given subject and object"""
|
||||
for t, c in self.triples((subject, None, object)):
|
||||
yield t[1]
|
||||
|
||||
def objects(
|
||||
self,
|
||||
subject: Optional[_SubjectType] = None,
|
||||
predicate: Optional[_PredicateType] = None,
|
||||
) -> Generator[_ObjectType, None, None]:
|
||||
"""A generator of objects with the given subject and predicate"""
|
||||
for t, c in self.triples((subject, predicate, None)):
|
||||
yield t[2]
|
||||
|
||||
def subject_predicates(
|
||||
self, object: Optional[_ObjectType] = None
|
||||
) -> Generator[Tuple[_SubjectType, _PredicateType], None, None]:
|
||||
"""A generator of (subject, predicate) tuples for the given object"""
|
||||
for t, c in self.triples((None, None, object)):
|
||||
yield t[0], t[1]
|
||||
|
||||
def subject_objects(
|
||||
self, predicate: Optional[_PredicateType] = None
|
||||
) -> Generator[Tuple[_SubjectType, _ObjectType], None, None]:
|
||||
"""A generator of (subject, object) tuples for the given predicate"""
|
||||
for t, c in self.triples((None, predicate, None)):
|
||||
yield t[0], t[2]
|
||||
|
||||
def predicate_objects(
|
||||
self, subject: Optional[_SubjectType] = None
|
||||
) -> Generator[Tuple[_PredicateType, _ObjectType], None, None]:
|
||||
"""A generator of (predicate, object) tuples for the given subject"""
|
||||
for t, c in self.triples((subject, None, None)):
|
||||
yield t[1], t[2]
|
||||
|
||||
|
||||
__all__ = ["SPARQLUpdateStore", "SPARQLStore"]
|
||||
Reference in New Issue
Block a user