2025-12-01
This commit is contained in:
@@ -0,0 +1,207 @@
|
||||
"""
|
||||
HextuplesSerializer RDF graph serializer for RDFLib.
|
||||
See <https://github.com/ontola/hextuples> for details about the format.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import warnings
|
||||
from typing import IO, Any, Callable, List, Optional, Type, Union, cast
|
||||
|
||||
from rdflib.graph import DATASET_DEFAULT_GRAPH_ID, ConjunctiveGraph, Dataset, Graph
|
||||
from rdflib.namespace import RDF, XSD
|
||||
from rdflib.serializer import Serializer
|
||||
from rdflib.term import BNode, IdentifiedNode, Literal, URIRef
|
||||
|
||||
try:
|
||||
import orjson
|
||||
|
||||
_HAS_ORJSON = True
|
||||
except ImportError:
|
||||
orjson = None # type: ignore[assignment, unused-ignore]
|
||||
_HAS_ORJSON = False
|
||||
|
||||
__all__ = ["HextuplesSerializer"]
|
||||
|
||||
|
||||
class HextuplesSerializer(Serializer):
|
||||
"""
|
||||
Serializes RDF graphs to NTriples format.
|
||||
"""
|
||||
|
||||
contexts: List[Union[Graph, IdentifiedNode]]
|
||||
dumps: Callable
|
||||
|
||||
def __new__(cls, store: Union[Graph, Dataset, ConjunctiveGraph]):
|
||||
if _HAS_ORJSON:
|
||||
cls.str_local_id: Union[str, Any] = orjson.Fragment(b'"localId"')
|
||||
cls.str_global_id: Union[str, Any] = orjson.Fragment(b'"globalId"')
|
||||
cls.empty: Union[str, Any] = orjson.Fragment(b'""')
|
||||
cls.lang_str: Union[str, Any] = orjson.Fragment(
|
||||
b'"' + RDF.langString.encode("utf-8") + b'"'
|
||||
)
|
||||
cls.xsd_string: Union[str, Any] = orjson.Fragment(
|
||||
b'"' + XSD.string.encode("utf-8") + b'"'
|
||||
)
|
||||
else:
|
||||
cls.str_local_id = "localId"
|
||||
cls.str_global_id = "globalId"
|
||||
cls.empty = ""
|
||||
cls.lang_str = f"{RDF.langString}"
|
||||
cls.xsd_string = f"{XSD.string}"
|
||||
return super(cls, cls).__new__(cls)
|
||||
|
||||
def __init__(self, store: Union[Graph, Dataset, ConjunctiveGraph]):
|
||||
self.default_context: Optional[Union[Graph, IdentifiedNode]]
|
||||
self.graph_type: Union[Type[Graph], Type[Dataset], Type[ConjunctiveGraph]]
|
||||
if isinstance(store, (Dataset, ConjunctiveGraph)):
|
||||
self.graph_type = (
|
||||
Dataset if isinstance(store, Dataset) else ConjunctiveGraph
|
||||
)
|
||||
self.contexts = list(store.contexts())
|
||||
if store.default_context:
|
||||
self.default_context = store.default_context
|
||||
self.contexts.append(store.default_context)
|
||||
else:
|
||||
self.default_context = None
|
||||
else:
|
||||
self.graph_type = Graph
|
||||
self.contexts = [store]
|
||||
self.default_context = None
|
||||
|
||||
Serializer.__init__(self, store)
|
||||
|
||||
def serialize(
|
||||
self,
|
||||
stream: IO[bytes],
|
||||
base: Optional[str] = None,
|
||||
encoding: Optional[str] = "utf-8",
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
if base is not None:
|
||||
warnings.warn(
|
||||
"base has no meaning for Hextuples serialization. "
|
||||
"I will ignore this value"
|
||||
)
|
||||
|
||||
if encoding not in [None, "utf-8"]:
|
||||
warnings.warn(
|
||||
f"Hextuples files are always utf-8 encoded. "
|
||||
f"I was passed: {encoding}, "
|
||||
"but I'm still going to use utf-8 anyway!"
|
||||
)
|
||||
|
||||
if self.store.formula_aware is True:
|
||||
raise Exception(
|
||||
"Hextuple serialization can't (yet) handle formula-aware stores"
|
||||
)
|
||||
context: Union[Graph, IdentifiedNode]
|
||||
context_str: Union[bytes, str]
|
||||
for context in self.contexts:
|
||||
for triple in context:
|
||||
# Generate context string just once, because it doesn't change
|
||||
# for every triple in this context
|
||||
context_str = cast(
|
||||
Union[str, bytes],
|
||||
(
|
||||
self.empty
|
||||
if self.graph_type is Graph
|
||||
else (
|
||||
orjson.Fragment('"' + self._context_str(context) + '"')
|
||||
if _HAS_ORJSON
|
||||
else self._context_str(context)
|
||||
)
|
||||
),
|
||||
)
|
||||
hl = self._hex_line(triple, context_str)
|
||||
if hl is not None:
|
||||
stream.write(hl if _HAS_ORJSON else hl.encode())
|
||||
|
||||
def _hex_line(self, triple, context_str: Union[bytes, str]):
|
||||
if isinstance(
|
||||
triple[0], (URIRef, BNode)
|
||||
): # exclude QuotedGraph and other objects
|
||||
# value
|
||||
value = (
|
||||
triple[2]
|
||||
if isinstance(triple[2], Literal)
|
||||
else self._iri_or_bn(triple[2])
|
||||
)
|
||||
|
||||
# datatype
|
||||
if isinstance(triple[2], URIRef):
|
||||
# datatype = "http://www.w3.org/1999/02/22-rdf-syntax-ns#namedNode"
|
||||
datatype = self.str_global_id
|
||||
elif isinstance(triple[2], BNode):
|
||||
# datatype = "http://www.w3.org/1999/02/22-rdf-syntax-ns#blankNode"
|
||||
datatype = self.str_local_id
|
||||
elif isinstance(triple[2], Literal):
|
||||
if triple[2].datatype is not None:
|
||||
datatype = f"{triple[2].datatype}"
|
||||
else:
|
||||
if triple[2].language is not None: # language
|
||||
datatype = self.lang_str
|
||||
else:
|
||||
datatype = self.xsd_string
|
||||
else:
|
||||
return None # can't handle non URI, BN or Literal Object (QuotedGraph)
|
||||
|
||||
# language
|
||||
if isinstance(triple[2], Literal):
|
||||
if triple[2].language is not None:
|
||||
language = f"{triple[2].language}"
|
||||
else:
|
||||
language = self.empty
|
||||
else:
|
||||
language = self.empty
|
||||
line_list = [
|
||||
self._iri_or_bn(triple[0]),
|
||||
triple[1],
|
||||
value,
|
||||
datatype,
|
||||
language,
|
||||
context_str,
|
||||
]
|
||||
outline: Union[str, bytes]
|
||||
if _HAS_ORJSON:
|
||||
outline = orjson.dumps(line_list, option=orjson.OPT_APPEND_NEWLINE)
|
||||
else:
|
||||
outline = json.dumps(line_list) + "\n"
|
||||
return outline
|
||||
else: # do not return anything for non-IRIs or BNs, e.g. QuotedGraph, Subjects
|
||||
return None
|
||||
|
||||
def _iri_or_bn(self, i_):
|
||||
if isinstance(i_, URIRef):
|
||||
return f"{i_}"
|
||||
elif isinstance(i_, BNode):
|
||||
return f"{i_.n3()}"
|
||||
else:
|
||||
return None
|
||||
|
||||
def _context_str(self, context: Union[Graph, IdentifiedNode]) -> str:
|
||||
context_identifier: IdentifiedNode = (
|
||||
context.identifier if isinstance(context, Graph) else context
|
||||
)
|
||||
if context_identifier == DATASET_DEFAULT_GRAPH_ID:
|
||||
return ""
|
||||
if self.default_context is not None:
|
||||
if (
|
||||
isinstance(self.default_context, IdentifiedNode)
|
||||
and context_identifier == self.default_context
|
||||
):
|
||||
return ""
|
||||
elif (
|
||||
isinstance(self.default_context, Graph)
|
||||
and context_identifier == self.default_context.identifier
|
||||
):
|
||||
return ""
|
||||
if self.graph_type is Graph:
|
||||
# Only emit a context name when serializing a Dataset or ConjunctiveGraph
|
||||
return ""
|
||||
return (
|
||||
f"{context_identifier}"
|
||||
if isinstance(context_identifier, URIRef)
|
||||
else context_identifier.n3()
|
||||
)
|
||||
@@ -0,0 +1,433 @@
|
||||
"""
|
||||
This serialiser will output an RDF Graph as a JSON-LD formatted document. See:
|
||||
|
||||
http://json-ld.org/
|
||||
|
||||
Example usage::
|
||||
|
||||
>>> from rdflib import Graph
|
||||
>>> testrdf = '''
|
||||
... @prefix dc: <http://purl.org/dc/terms/> .
|
||||
... <http://example.org/about>
|
||||
... dc:title "Someone's Homepage"@en .
|
||||
... '''
|
||||
|
||||
>>> g = Graph().parse(data=testrdf, format='n3')
|
||||
|
||||
>>> print(g.serialize(format='json-ld', indent=2))
|
||||
[
|
||||
{
|
||||
"@id": "http://example.org/about",
|
||||
"http://purl.org/dc/terms/title": [
|
||||
{
|
||||
"@language": "en",
|
||||
"@value": "Someone's Homepage"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
|
||||
"""
|
||||
|
||||
# From: https://github.com/RDFLib/rdflib-jsonld/blob/feature/json-ld-1.1/rdflib_jsonld/serializer.py
|
||||
|
||||
# NOTE: This code writes the entire JSON object into memory before serialising,
|
||||
# but we should consider streaming the output to deal with arbitrarily large
|
||||
# graphs.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import warnings
|
||||
from typing import IO, Any, Dict, List, Optional
|
||||
|
||||
from rdflib.graph import DATASET_DEFAULT_GRAPH_ID, Graph, _ObjectType
|
||||
from rdflib.namespace import RDF, XSD
|
||||
from rdflib.serializer import Serializer
|
||||
from rdflib.term import BNode, IdentifiedNode, Identifier, Literal, URIRef
|
||||
|
||||
from ..shared.jsonld.context import UNDEF, Context
|
||||
from ..shared.jsonld.keys import CONTEXT, GRAPH, ID, LANG, LIST, SET, VOCAB
|
||||
from ..shared.jsonld.util import _HAS_ORJSON, json, orjson
|
||||
|
||||
__all__ = ["JsonLDSerializer", "from_rdf"]
|
||||
|
||||
|
||||
PLAIN_LITERAL_TYPES = {XSD.boolean, XSD.integer, XSD.double, XSD.string}
|
||||
|
||||
|
||||
class JsonLDSerializer(Serializer):
|
||||
def __init__(self, store: Graph):
|
||||
super(JsonLDSerializer, self).__init__(store)
|
||||
|
||||
def serialize(
|
||||
self,
|
||||
stream: IO[bytes],
|
||||
base: Optional[str] = None,
|
||||
encoding: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
# TODO: docstring w. args and return value
|
||||
encoding = encoding or "utf-8"
|
||||
if encoding not in ("utf-8", "utf-16"):
|
||||
warnings.warn(
|
||||
"JSON should be encoded as unicode. " f"Given encoding was: {encoding}"
|
||||
)
|
||||
|
||||
context_data = kwargs.get("context")
|
||||
use_native_types = (kwargs.get("use_native_types", False),)
|
||||
use_rdf_type = kwargs.get("use_rdf_type", False)
|
||||
auto_compact = kwargs.get("auto_compact", False)
|
||||
|
||||
indent = kwargs.get("indent", 2)
|
||||
separators = kwargs.get("separators", (",", ": "))
|
||||
sort_keys = kwargs.get("sort_keys", True)
|
||||
ensure_ascii = kwargs.get("ensure_ascii", False)
|
||||
|
||||
obj = from_rdf(
|
||||
self.store,
|
||||
context_data,
|
||||
base,
|
||||
use_native_types,
|
||||
use_rdf_type,
|
||||
auto_compact=auto_compact,
|
||||
)
|
||||
if _HAS_ORJSON:
|
||||
option: int = orjson.OPT_NON_STR_KEYS
|
||||
if indent is not None:
|
||||
option |= orjson.OPT_INDENT_2
|
||||
if sort_keys:
|
||||
option |= orjson.OPT_SORT_KEYS
|
||||
if ensure_ascii:
|
||||
warnings.warn("Cannot use ensure_ascii with orjson")
|
||||
data_bytes = orjson.dumps(obj, option=option)
|
||||
stream.write(data_bytes)
|
||||
else:
|
||||
data = json.dumps(
|
||||
obj,
|
||||
indent=indent,
|
||||
separators=separators,
|
||||
sort_keys=sort_keys,
|
||||
ensure_ascii=ensure_ascii,
|
||||
)
|
||||
stream.write(data.encode(encoding, "replace"))
|
||||
|
||||
|
||||
def from_rdf(
|
||||
graph,
|
||||
context_data=None,
|
||||
base=None,
|
||||
use_native_types=False,
|
||||
use_rdf_type=False,
|
||||
auto_compact=False,
|
||||
startnode=None,
|
||||
index=False,
|
||||
):
|
||||
# TODO: docstring w. args and return value
|
||||
# TODO: support for index and startnode
|
||||
|
||||
if not context_data and auto_compact:
|
||||
context_data = dict(
|
||||
(pfx, str(ns))
|
||||
for (pfx, ns) in graph.namespaces()
|
||||
if pfx and str(ns) != "http://www.w3.org/XML/1998/namespace"
|
||||
)
|
||||
|
||||
if isinstance(context_data, Context):
|
||||
context = context_data
|
||||
context_data = context.to_dict()
|
||||
else:
|
||||
context = Context(context_data, base=base)
|
||||
|
||||
converter = Converter(context, use_native_types, use_rdf_type)
|
||||
result = converter.convert(graph)
|
||||
|
||||
if converter.context.active:
|
||||
if isinstance(result, list):
|
||||
result = {context.get_key(GRAPH): result}
|
||||
result[CONTEXT] = context_data
|
||||
|
||||
return result
|
||||
|
||||
|
||||
class Converter:
|
||||
def __init__(self, context: Context, use_native_types: bool, use_rdf_type: bool):
|
||||
self.context = context
|
||||
self.use_native_types = context.active or use_native_types
|
||||
self.use_rdf_type = use_rdf_type
|
||||
|
||||
def convert(self, graph: Graph):
|
||||
# TODO: bug in rdflib dataset parsing (nquads et al):
|
||||
# plain triples end up in separate unnamed graphs (rdflib issue #436)
|
||||
if graph.context_aware:
|
||||
# type error: "Graph" has no attribute "contexts"
|
||||
all_contexts = list(graph.contexts()) # type: ignore[attr-defined]
|
||||
has_dataset_default_id = any(
|
||||
c.identifier == DATASET_DEFAULT_GRAPH_ID for c in all_contexts
|
||||
)
|
||||
if (
|
||||
has_dataset_default_id
|
||||
# # type error: "Graph" has no attribute "contexts"
|
||||
and graph.default_context.identifier == DATASET_DEFAULT_GRAPH_ID # type: ignore[attr-defined]
|
||||
):
|
||||
default_graph = graph.default_context # type: ignore[attr-defined]
|
||||
else:
|
||||
default_graph = Graph()
|
||||
graphs = [default_graph]
|
||||
default_graph_id = default_graph.identifier
|
||||
|
||||
for g in all_contexts:
|
||||
if g in graphs:
|
||||
continue
|
||||
if isinstance(g.identifier, URIRef):
|
||||
graphs.append(g)
|
||||
else:
|
||||
default_graph += g
|
||||
else:
|
||||
graphs = [graph]
|
||||
default_graph_id = graph.identifier
|
||||
|
||||
context = self.context
|
||||
|
||||
objs: List[Any] = []
|
||||
for g in graphs:
|
||||
obj = {}
|
||||
graphname = None
|
||||
|
||||
if isinstance(g.identifier, URIRef):
|
||||
if g.identifier != default_graph_id:
|
||||
graphname = context.shrink_iri(g.identifier)
|
||||
obj[context.id_key] = graphname
|
||||
|
||||
nodes = self.from_graph(g)
|
||||
|
||||
if not graphname and len(nodes) == 1:
|
||||
obj.update(nodes[0])
|
||||
else:
|
||||
if not nodes:
|
||||
continue
|
||||
obj[context.graph_key] = nodes
|
||||
|
||||
if objs and objs[0].get(context.get_key(ID)) == graphname:
|
||||
objs[0].update(obj)
|
||||
else:
|
||||
objs.append(obj)
|
||||
|
||||
if len(graphs) == 1 and len(objs) == 1 and not self.context.active:
|
||||
default = objs[0]
|
||||
items = default.get(context.graph_key)
|
||||
if len(default) == 1 and items:
|
||||
objs = items
|
||||
elif len(objs) == 1 and self.context.active:
|
||||
objs = objs[0]
|
||||
|
||||
return objs
|
||||
|
||||
def from_graph(self, graph: Graph):
|
||||
nodemap: Dict[Any, Any] = {}
|
||||
|
||||
for s in set(graph.subjects()):
|
||||
## only iri:s and unreferenced (rest will be promoted to top if needed)
|
||||
if isinstance(s, URIRef) or (
|
||||
isinstance(s, BNode) and not any(graph.subjects(None, s))
|
||||
):
|
||||
self.process_subject(graph, s, nodemap)
|
||||
|
||||
return list(nodemap.values())
|
||||
|
||||
def process_subject(self, graph: Graph, s: IdentifiedNode, nodemap):
|
||||
if isinstance(s, URIRef):
|
||||
node_id = self.context.shrink_iri(s)
|
||||
elif isinstance(s, BNode):
|
||||
node_id = s.n3()
|
||||
else:
|
||||
# This does not seem right, this probably should be an error.
|
||||
node_id = None
|
||||
|
||||
# used_as_object = any(graph.subjects(None, s))
|
||||
if node_id in nodemap:
|
||||
return None
|
||||
|
||||
node = {}
|
||||
node[self.context.id_key] = node_id
|
||||
nodemap[node_id] = node
|
||||
|
||||
for p, o in graph.predicate_objects(s):
|
||||
# type error: Argument 3 to "add_to_node" of "Converter" has incompatible type "Node"; expected "IdentifiedNode"
|
||||
# type error: Argument 4 to "add_to_node" of "Converter" has incompatible type "Node"; expected "Identifier"
|
||||
self.add_to_node(graph, s, p, o, node, nodemap) # type: ignore[arg-type]
|
||||
|
||||
return node
|
||||
|
||||
def add_to_node(
|
||||
self,
|
||||
graph: Graph,
|
||||
s: IdentifiedNode,
|
||||
p: IdentifiedNode,
|
||||
o: Identifier,
|
||||
s_node: Dict[str, Any],
|
||||
nodemap,
|
||||
):
|
||||
context = self.context
|
||||
|
||||
if isinstance(o, Literal):
|
||||
datatype = str(o.datatype) if o.datatype else None
|
||||
language = o.language
|
||||
term = context.find_term(str(p), datatype, language=language)
|
||||
else:
|
||||
containers = [LIST, None] if graph.value(o, RDF.first) else [None]
|
||||
for container in containers:
|
||||
for coercion in (ID, VOCAB, UNDEF):
|
||||
# type error: Argument 2 to "find_term" of "Context" has incompatible type "object"; expected "Union[str, Defined, None]"
|
||||
# type error: Argument 3 to "find_term" of "Context" has incompatible type "Optional[str]"; expected "Union[Defined, str]"
|
||||
term = context.find_term(str(p), coercion, container) # type: ignore[arg-type]
|
||||
if term:
|
||||
break
|
||||
if term:
|
||||
break
|
||||
|
||||
node = None
|
||||
use_set = not context.active
|
||||
|
||||
if term:
|
||||
p_key = term.name
|
||||
|
||||
if term.type:
|
||||
node = self.type_coerce(o, term.type)
|
||||
# type error: "Identifier" has no attribute "language"
|
||||
elif term.language and o.language == term.language: # type: ignore[attr-defined]
|
||||
node = str(o)
|
||||
# type error: Right operand of "and" is never evaluated
|
||||
elif context.language and (term.language is None and o.language is None): # type: ignore[unreachable]
|
||||
node = str(o) # type: ignore[unreachable]
|
||||
|
||||
if LIST in term.container:
|
||||
node = [
|
||||
self.type_coerce(v, term.type)
|
||||
or self.to_raw_value(graph, s, v, nodemap)
|
||||
for v in self.to_collection(graph, o)
|
||||
]
|
||||
elif LANG in term.container and language:
|
||||
value = s_node.setdefault(p_key, {})
|
||||
values = value.get(language)
|
||||
node = str(o)
|
||||
if values or SET in term.container:
|
||||
if not isinstance(values, list):
|
||||
value[language] = values = [values]
|
||||
values.append(node)
|
||||
else:
|
||||
value[language] = node
|
||||
return
|
||||
elif SET in term.container:
|
||||
use_set = True
|
||||
|
||||
else:
|
||||
p_key = context.to_symbol(p)
|
||||
# TODO: for coercing curies - quite clumsy; unify to_symbol and find_term?
|
||||
key_term = context.terms.get(p_key)
|
||||
if key_term and (key_term.type or key_term.container):
|
||||
p_key = p
|
||||
if not term and p == RDF.type and not self.use_rdf_type:
|
||||
if isinstance(o, URIRef):
|
||||
node = context.to_symbol(o)
|
||||
p_key = context.type_key
|
||||
|
||||
if node is None:
|
||||
node = self.to_raw_value(graph, s, o, nodemap)
|
||||
|
||||
value = s_node.get(p_key)
|
||||
if value:
|
||||
if not isinstance(value, list):
|
||||
value = [value]
|
||||
value.append(node)
|
||||
elif use_set:
|
||||
value = [node]
|
||||
else:
|
||||
value = node
|
||||
s_node[p_key] = value
|
||||
|
||||
def type_coerce(self, o: Identifier, coerce_type: str):
|
||||
if coerce_type == ID:
|
||||
if isinstance(o, URIRef):
|
||||
return self.context.shrink_iri(o)
|
||||
elif isinstance(o, BNode):
|
||||
return o.n3()
|
||||
else:
|
||||
return o
|
||||
elif coerce_type == VOCAB and isinstance(o, URIRef):
|
||||
return self.context.to_symbol(o)
|
||||
elif isinstance(o, Literal) and str(o.datatype) == coerce_type:
|
||||
return o
|
||||
else:
|
||||
return None
|
||||
|
||||
def to_raw_value(
|
||||
self, graph: Graph, s: IdentifiedNode, o: Identifier, nodemap: Dict[str, Any]
|
||||
):
|
||||
context = self.context
|
||||
coll = self.to_collection(graph, o)
|
||||
if coll is not None:
|
||||
coll = [
|
||||
self.to_raw_value(graph, s, lo, nodemap)
|
||||
for lo in self.to_collection(graph, o)
|
||||
]
|
||||
return {context.list_key: coll}
|
||||
elif isinstance(o, BNode):
|
||||
embed = (
|
||||
False # TODO: self.context.active or using startnode and only one ref
|
||||
)
|
||||
onode = self.process_subject(graph, o, nodemap)
|
||||
if onode:
|
||||
if embed and not any(s2 for s2 in graph.subjects(None, o) if s2 != s):
|
||||
return onode
|
||||
else:
|
||||
nodemap[onode[context.id_key]] = onode
|
||||
return {context.id_key: o.n3()}
|
||||
elif isinstance(o, URIRef):
|
||||
# TODO: embed if o != startnode (else reverse)
|
||||
return {context.id_key: context.shrink_iri(o)}
|
||||
elif isinstance(o, Literal):
|
||||
# TODO: if compact
|
||||
native = self.use_native_types and o.datatype in PLAIN_LITERAL_TYPES
|
||||
if native:
|
||||
v = o.toPython()
|
||||
else:
|
||||
v = str(o)
|
||||
if o.datatype:
|
||||
if native and self.context.active:
|
||||
return v
|
||||
return {
|
||||
context.type_key: context.to_symbol(o.datatype),
|
||||
context.value_key: v,
|
||||
}
|
||||
elif o.language and o.language != context.language:
|
||||
return {context.lang_key: o.language, context.value_key: v}
|
||||
# type error: Right operand of "and" is never evaluated
|
||||
elif not context.active or context.language and not o.language: # type: ignore[unreachable]
|
||||
return {context.value_key: v}
|
||||
else:
|
||||
return v
|
||||
|
||||
def to_collection(self, graph: Graph, l_: Identifier):
|
||||
if l_ != RDF.nil and not graph.value(l_, RDF.first):
|
||||
return None
|
||||
list_nodes: List[Optional[_ObjectType]] = []
|
||||
chain = set([l_])
|
||||
while l_:
|
||||
if l_ == RDF.nil:
|
||||
return list_nodes
|
||||
if isinstance(l_, URIRef):
|
||||
return None
|
||||
first, rest = None, None
|
||||
for p, o in graph.predicate_objects(l_):
|
||||
if not first and p == RDF.first:
|
||||
first = o
|
||||
elif not rest and p == RDF.rest:
|
||||
rest = o
|
||||
elif p != RDF.type or o != RDF.List:
|
||||
return None
|
||||
list_nodes.append(first)
|
||||
# type error: Incompatible types in assignment (expression has type "Optional[Node]", variable has type "Identifier")
|
||||
l_ = rest # type: ignore[assignment]
|
||||
if l_ in chain:
|
||||
return None
|
||||
chain.add(l_)
|
||||
+326
@@ -0,0 +1,326 @@
|
||||
"""
|
||||
LongTurtle RDF graph serializer for RDFLib.
|
||||
See <http://www.w3.org/TeamSubmission/turtle/> for syntax specification.
|
||||
|
||||
This variant, longturtle as opposed to just turtle, makes some small format changes
|
||||
to turtle - the original turtle serializer. It:
|
||||
|
||||
* uses PREFIX instead of @prefix
|
||||
* uses BASE instead of @base
|
||||
* adds a new line at RDF.type, or 'a'
|
||||
* adds a newline and an indent for all triples with more than one object (object list)
|
||||
* adds a new line and ';' for the last triple in a set with '.'
|
||||
on the start of the next line
|
||||
* uses default encoding (encode()) is used instead of "latin-1"
|
||||
|
||||
- Nicholas Car, 2023
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import IO, Any, Optional
|
||||
|
||||
from rdflib.compare import to_canonical_graph
|
||||
from rdflib.exceptions import Error
|
||||
from rdflib.graph import Graph
|
||||
from rdflib.namespace import RDF
|
||||
from rdflib.term import BNode, Literal, URIRef
|
||||
|
||||
from .turtle import RecursiveSerializer
|
||||
|
||||
__all__ = ["LongTurtleSerializer"]
|
||||
|
||||
SUBJECT = 0
|
||||
VERB = 1
|
||||
OBJECT = 2
|
||||
|
||||
_GEN_QNAME_FOR_DT = False
|
||||
_SPACIOUS_OUTPUT = False
|
||||
|
||||
|
||||
class LongTurtleSerializer(RecursiveSerializer):
|
||||
short_name = "longturtle"
|
||||
indentString = " "
|
||||
|
||||
def __init__(self, store):
|
||||
self._ns_rewrite = {}
|
||||
store = to_canonical_graph(store)
|
||||
content = store.serialize(format="application/n-triples")
|
||||
lines = content.split("\n")
|
||||
lines.sort()
|
||||
graph = Graph()
|
||||
graph.parse(
|
||||
data="\n".join(lines), format="application/n-triples", skolemize=True
|
||||
)
|
||||
graph = graph.de_skolemize()
|
||||
super(LongTurtleSerializer, self).__init__(graph)
|
||||
self.keywords = {RDF.type: "a"}
|
||||
self.reset()
|
||||
self.stream = None
|
||||
self._spacious: bool = _SPACIOUS_OUTPUT
|
||||
|
||||
def addNamespace(self, prefix, namespace):
|
||||
# Turtle does not support prefixes that start with _
|
||||
# if they occur in the graph, rewrite to p_blah
|
||||
# this is more complicated since we need to make sure p_blah
|
||||
# does not already exist. And we register namespaces as we go, i.e.
|
||||
# we may first see a triple with prefix _9 - rewrite it to p_9
|
||||
# and then later find a triple with a "real" p_9 prefix
|
||||
|
||||
# so we need to keep track of ns rewrites we made so far.
|
||||
|
||||
if (prefix > "" and prefix[0] == "_") or self.namespaces.get(
|
||||
prefix, namespace
|
||||
) != namespace:
|
||||
if prefix not in self._ns_rewrite:
|
||||
p = "p" + prefix
|
||||
while p in self.namespaces:
|
||||
p = "p" + p
|
||||
self._ns_rewrite[prefix] = p
|
||||
|
||||
prefix = self._ns_rewrite.get(prefix, prefix)
|
||||
|
||||
super(LongTurtleSerializer, self).addNamespace(prefix, namespace)
|
||||
return prefix
|
||||
|
||||
def reset(self):
|
||||
super(LongTurtleSerializer, self).reset()
|
||||
self._shortNames = {}
|
||||
self._started = False
|
||||
self._ns_rewrite = {}
|
||||
|
||||
def serialize(
|
||||
self,
|
||||
stream: IO[bytes],
|
||||
base: Optional[str] = None,
|
||||
encoding: Optional[str] = None,
|
||||
spacious: Optional[bool] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
self.reset()
|
||||
self.stream = stream
|
||||
# if base is given here, use, if not and a base is set for the graph use that
|
||||
if base is not None:
|
||||
self.base = base
|
||||
elif self.store.base is not None:
|
||||
self.base = self.store.base
|
||||
|
||||
if spacious is not None:
|
||||
self._spacious = spacious
|
||||
|
||||
self.preprocess()
|
||||
subjects_list = self.orderSubjects()
|
||||
|
||||
self.startDocument()
|
||||
|
||||
firstTime = True
|
||||
for subject in subjects_list:
|
||||
if self.isDone(subject):
|
||||
continue
|
||||
if firstTime:
|
||||
firstTime = False
|
||||
if self.statement(subject) and not firstTime:
|
||||
self.write("\n")
|
||||
|
||||
self.endDocument()
|
||||
|
||||
self.base = None
|
||||
|
||||
def preprocessTriple(self, triple):
|
||||
super(LongTurtleSerializer, self).preprocessTriple(triple)
|
||||
for i, node in enumerate(triple):
|
||||
if node in self.keywords:
|
||||
continue
|
||||
# Don't use generated prefixes for subjects and objects
|
||||
self.getQName(node, gen_prefix=(i == VERB))
|
||||
if isinstance(node, Literal) and node.datatype:
|
||||
self.getQName(node.datatype, gen_prefix=_GEN_QNAME_FOR_DT)
|
||||
p = triple[1]
|
||||
if isinstance(p, BNode): # hmm - when is P ever a bnode?
|
||||
self._references[p] += 1
|
||||
|
||||
def getQName(self, uri, gen_prefix=True):
|
||||
if not isinstance(uri, URIRef):
|
||||
return None
|
||||
|
||||
try:
|
||||
parts = self.store.compute_qname(uri, generate=gen_prefix)
|
||||
except Exception:
|
||||
# is the uri a namespace in itself?
|
||||
pfx = self.store.store.prefix(uri)
|
||||
|
||||
if pfx is not None:
|
||||
parts = (pfx, uri, "")
|
||||
else:
|
||||
# nothing worked
|
||||
return None
|
||||
|
||||
prefix, namespace, local = parts
|
||||
|
||||
# QName cannot end with .
|
||||
if local.endswith("."):
|
||||
return None
|
||||
|
||||
prefix = self.addNamespace(prefix, namespace)
|
||||
|
||||
return "%s:%s" % (prefix, local)
|
||||
|
||||
def startDocument(self):
|
||||
self._started = True
|
||||
ns_list = sorted(self.namespaces.items())
|
||||
|
||||
if self.base:
|
||||
self.write(self.indent() + "BASE <%s>\n" % self.base)
|
||||
for prefix, uri in ns_list:
|
||||
self.write(self.indent() + "PREFIX %s: <%s>\n" % (prefix, uri))
|
||||
if ns_list and self._spacious:
|
||||
self.write("\n")
|
||||
|
||||
def endDocument(self):
|
||||
if self._spacious:
|
||||
self.write("\n")
|
||||
|
||||
def statement(self, subject):
|
||||
self.subjectDone(subject)
|
||||
return self.s_squared(subject) or self.s_default(subject)
|
||||
|
||||
def s_default(self, subject):
|
||||
self.write("\n" + self.indent())
|
||||
self.path(subject, SUBJECT)
|
||||
self.write("\n" + self.indent())
|
||||
self.predicateList(subject)
|
||||
self.write("\n.")
|
||||
return True
|
||||
|
||||
def s_squared(self, subject):
|
||||
if (self._references[subject] > 0) or not isinstance(subject, BNode):
|
||||
return False
|
||||
self.write("\n" + self.indent() + "[]")
|
||||
self.predicateList(subject, newline=False)
|
||||
self.write("\n.")
|
||||
return True
|
||||
|
||||
def path(self, node, position, newline=False):
|
||||
if not (
|
||||
self.p_squared(node, position) or self.p_default(node, position, newline)
|
||||
):
|
||||
raise Error("Cannot serialize node '%s'" % (node,))
|
||||
|
||||
def p_default(self, node, position, newline=False):
|
||||
if position != SUBJECT and not newline:
|
||||
self.write(" ")
|
||||
self.write(self.label(node, position))
|
||||
return True
|
||||
|
||||
def label(self, node, position):
|
||||
if node == RDF.nil:
|
||||
return "()"
|
||||
if position is VERB and node in self.keywords:
|
||||
return self.keywords[node]
|
||||
if isinstance(node, Literal):
|
||||
return node._literal_n3(
|
||||
use_plain=True,
|
||||
qname_callback=lambda dt: self.getQName(dt, _GEN_QNAME_FOR_DT),
|
||||
)
|
||||
else:
|
||||
node = self.relativize(node)
|
||||
|
||||
return self.getQName(node, position == VERB) or node.n3()
|
||||
|
||||
def p_squared(
|
||||
self,
|
||||
node,
|
||||
position,
|
||||
):
|
||||
if (
|
||||
not isinstance(node, BNode)
|
||||
or node in self._serialized
|
||||
or self._references[node] > 1
|
||||
or position == SUBJECT
|
||||
):
|
||||
return False
|
||||
|
||||
if self.isValidList(node):
|
||||
# this is a list
|
||||
self.depth += 2
|
||||
self.write(" (\n")
|
||||
self.depth -= 2
|
||||
self.doList(node)
|
||||
self.write("\n" + self.indent() + ")")
|
||||
else:
|
||||
# this is a Blank Node
|
||||
self.subjectDone(node)
|
||||
self.write("\n" + self.indent(1) + "[\n")
|
||||
self.depth += 1
|
||||
self.predicateList(node)
|
||||
self.depth -= 1
|
||||
self.write("\n" + self.indent(1) + "]")
|
||||
|
||||
return True
|
||||
|
||||
def isValidList(self, l_):
|
||||
"""
|
||||
Checks if l is a valid RDF list, i.e. no nodes have other properties.
|
||||
"""
|
||||
try:
|
||||
if self.store.value(l_, RDF.first) is None:
|
||||
return False
|
||||
except Exception:
|
||||
return False
|
||||
while l_:
|
||||
if l_ != RDF.nil and len(list(self.store.predicate_objects(l_))) != 2:
|
||||
return False
|
||||
l_ = self.store.value(l_, RDF.rest)
|
||||
return True
|
||||
|
||||
def doList(self, l_):
|
||||
i = 0
|
||||
while l_:
|
||||
item = self.store.value(l_, RDF.first)
|
||||
if item is not None:
|
||||
if i == 0:
|
||||
self.write(self.indent(1))
|
||||
else:
|
||||
self.write("\n" + self.indent(1))
|
||||
self.path(item, OBJECT, newline=True)
|
||||
self.subjectDone(l_)
|
||||
l_ = self.store.value(l_, RDF.rest)
|
||||
i += 1
|
||||
|
||||
def predicateList(self, subject, newline=False):
|
||||
properties = self.buildPredicateHash(subject)
|
||||
propList = self.sortProperties(properties)
|
||||
if len(propList) == 0:
|
||||
return
|
||||
self.write(self.indent(1))
|
||||
self.verb(propList[0], newline=True)
|
||||
self.objectList(properties[propList[0]])
|
||||
for predicate in propList[1:]:
|
||||
self.write(" ;\n" + self.indent(1))
|
||||
self.verb(predicate, newline=True)
|
||||
self.objectList(properties[predicate])
|
||||
self.write(" ;")
|
||||
|
||||
def verb(self, node, newline=False):
|
||||
self.path(node, VERB, newline)
|
||||
|
||||
def objectList(self, objects):
|
||||
count = len(objects)
|
||||
if count == 0:
|
||||
return
|
||||
depthmod = (count == 1) and 0 or 1
|
||||
self.depth += depthmod
|
||||
first_nl = False
|
||||
if count > 1:
|
||||
if not isinstance(objects[0], BNode):
|
||||
self.write("\n" + self.indent(1))
|
||||
else:
|
||||
self.write(" ")
|
||||
first_nl = True
|
||||
self.path(objects[0], OBJECT, newline=first_nl)
|
||||
for obj in objects[1:]:
|
||||
self.write(" ,")
|
||||
if not isinstance(obj, BNode):
|
||||
self.write("\n" + self.indent(1))
|
||||
self.path(obj, OBJECT, newline=True)
|
||||
self.depth -= depthmod
|
||||
@@ -0,0 +1,91 @@
|
||||
"""
|
||||
Notation 3 (N3) RDF graph serializer for RDFLib.
|
||||
"""
|
||||
|
||||
from rdflib.graph import Graph
|
||||
from rdflib.namespace import OWL, Namespace
|
||||
from rdflib.plugins.serializers.turtle import OBJECT, SUBJECT, TurtleSerializer
|
||||
|
||||
__all__ = ["N3Serializer"]
|
||||
|
||||
SWAP_LOG = Namespace("http://www.w3.org/2000/10/swap/log#")
|
||||
|
||||
|
||||
class N3Serializer(TurtleSerializer):
|
||||
short_name = "n3"
|
||||
|
||||
def __init__(self, store: Graph, parent=None):
|
||||
super(N3Serializer, self).__init__(store)
|
||||
self.keywords.update({OWL.sameAs: "=", SWAP_LOG.implies: "=>"})
|
||||
self.parent = parent
|
||||
|
||||
def reset(self):
|
||||
super(N3Serializer, self).reset()
|
||||
self._stores = {}
|
||||
|
||||
def endDocument(self): # noqa: N802
|
||||
if not self.parent:
|
||||
super(N3Serializer, self).endDocument()
|
||||
|
||||
def indent(self, modifier=0):
|
||||
indent = super(N3Serializer, self).indent(modifier)
|
||||
if self.parent is not None:
|
||||
indent += self.parent.indent() # modifier)
|
||||
return indent
|
||||
|
||||
def preprocessTriple(self, triple): # noqa: N802
|
||||
super(N3Serializer, self).preprocessTriple(triple)
|
||||
if isinstance(triple[0], Graph):
|
||||
for t in triple[0]:
|
||||
self.preprocessTriple(t)
|
||||
if isinstance(triple[1], Graph):
|
||||
for t in triple[1]:
|
||||
self.preprocessTriple(t)
|
||||
if isinstance(triple[2], Graph):
|
||||
for t in triple[2]:
|
||||
self.preprocessTriple(t)
|
||||
|
||||
def getQName(self, uri, gen_prefix=True): # noqa: N802
|
||||
qname = None
|
||||
if self.parent is not None:
|
||||
qname = self.parent.getQName(uri, gen_prefix)
|
||||
if qname is None:
|
||||
qname = super(N3Serializer, self).getQName(uri, gen_prefix)
|
||||
return qname
|
||||
|
||||
def statement(self, subject):
|
||||
self.subjectDone(subject)
|
||||
properties = self.buildPredicateHash(subject)
|
||||
if len(properties) == 0:
|
||||
return False
|
||||
return self.s_clause(subject) or super(N3Serializer, self).statement(subject)
|
||||
|
||||
def path(self, node, position, newline=False):
|
||||
if not self.p_clause(node, position):
|
||||
super(N3Serializer, self).path(node, position, newline)
|
||||
|
||||
def s_clause(self, subject):
|
||||
if isinstance(subject, Graph):
|
||||
self.write("\n" + self.indent())
|
||||
self.p_clause(subject, SUBJECT)
|
||||
self.predicateList(subject)
|
||||
self.write(" .")
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def p_clause(self, node, position):
|
||||
if isinstance(node, Graph):
|
||||
self.subjectDone(node)
|
||||
if position is OBJECT:
|
||||
self.write(" ")
|
||||
self.write("{")
|
||||
self.depth += 1
|
||||
serializer = N3Serializer(node, parent=self)
|
||||
# type error: Argument 1 to "serialize" of "TurtleSerializer" has incompatible type "Optional[IO[bytes]]"; expected "IO[bytes]"
|
||||
serializer.serialize(self.stream) # type: ignore[arg-type]
|
||||
self.depth -= 1
|
||||
self.write(self.indent() + "}")
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
@@ -0,0 +1,61 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import warnings
|
||||
from typing import IO, Any, Optional
|
||||
|
||||
from rdflib.graph import ConjunctiveGraph, Graph
|
||||
from rdflib.plugins.serializers.nt import _quoteLiteral
|
||||
from rdflib.serializer import Serializer
|
||||
from rdflib.term import Literal
|
||||
|
||||
__all__ = ["NQuadsSerializer"]
|
||||
|
||||
|
||||
class NQuadsSerializer(Serializer):
|
||||
def __init__(self, store: Graph):
|
||||
if not store.context_aware:
|
||||
raise Exception(
|
||||
"NQuads serialization only makes " "sense for context-aware stores!"
|
||||
)
|
||||
|
||||
super(NQuadsSerializer, self).__init__(store)
|
||||
self.store: ConjunctiveGraph
|
||||
|
||||
def serialize(
|
||||
self,
|
||||
stream: IO[bytes],
|
||||
base: Optional[str] = None,
|
||||
encoding: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
if base is not None:
|
||||
warnings.warn("NQuadsSerializer does not support base.")
|
||||
if encoding is not None and encoding.lower() != self.encoding.lower():
|
||||
warnings.warn(
|
||||
"NQuadsSerializer does not use custom encoding. "
|
||||
f"Given encoding was: {encoding}"
|
||||
)
|
||||
encoding = self.encoding
|
||||
for context in self.store.contexts():
|
||||
for triple in context:
|
||||
stream.write(
|
||||
_nq_row(triple, context.identifier).encode(encoding, "replace")
|
||||
)
|
||||
stream.write("\n".encode("latin-1"))
|
||||
|
||||
|
||||
def _nq_row(triple, context):
|
||||
if isinstance(triple[2], Literal):
|
||||
return "%s %s %s %s .\n" % (
|
||||
triple[0].n3(),
|
||||
triple[1].n3(),
|
||||
_quoteLiteral(triple[2]),
|
||||
context.n3(),
|
||||
)
|
||||
else:
|
||||
return "%s %s %s %s .\n" % (
|
||||
triple[0].n3(),
|
||||
triple[1].n3(),
|
||||
triple[2].n3(),
|
||||
context.n3(),
|
||||
)
|
||||
@@ -0,0 +1,115 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import codecs
|
||||
import warnings
|
||||
from typing import IO, TYPE_CHECKING, Any, Optional, Tuple, Union
|
||||
|
||||
from rdflib.graph import Graph
|
||||
from rdflib.serializer import Serializer
|
||||
from rdflib.term import Literal
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from rdflib.graph import _TripleType
|
||||
|
||||
"""
|
||||
N-Triples RDF graph serializer for RDFLib.
|
||||
See <http://www.w3.org/TR/rdf-testcases/#ntriples> for details about the
|
||||
format.
|
||||
"""
|
||||
|
||||
__all__ = ["NTSerializer"]
|
||||
|
||||
|
||||
class NTSerializer(Serializer):
|
||||
"""
|
||||
Serializes RDF graphs to NTriples format.
|
||||
"""
|
||||
|
||||
def __init__(self, store: Graph):
|
||||
Serializer.__init__(self, store)
|
||||
|
||||
def serialize(
|
||||
self,
|
||||
stream: IO[bytes],
|
||||
base: Optional[str] = None,
|
||||
encoding: Optional[str] = "utf-8",
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
if base is not None:
|
||||
warnings.warn("NTSerializer does not support base.")
|
||||
if encoding != "utf-8":
|
||||
warnings.warn(
|
||||
"NTSerializer always uses UTF-8 encoding. "
|
||||
f"Given encoding was: {encoding}"
|
||||
)
|
||||
|
||||
for triple in self.store:
|
||||
stream.write(_nt_row(triple).encode())
|
||||
|
||||
|
||||
class NT11Serializer(NTSerializer):
|
||||
"""
|
||||
Serializes RDF graphs to RDF 1.1 NTriples format.
|
||||
|
||||
Exactly like nt - only utf8 encoded.
|
||||
"""
|
||||
|
||||
def __init__(self, store: Graph):
|
||||
Serializer.__init__(self, store) # default to utf-8
|
||||
|
||||
|
||||
def _nt_row(triple: _TripleType) -> str:
|
||||
if isinstance(triple[2], Literal):
|
||||
return "%s %s %s .\n" % (
|
||||
triple[0].n3(),
|
||||
triple[1].n3(),
|
||||
_quoteLiteral(triple[2]),
|
||||
)
|
||||
else:
|
||||
return "%s %s %s .\n" % (triple[0].n3(), triple[1].n3(), triple[2].n3())
|
||||
|
||||
|
||||
def _quoteLiteral(l_: Literal) -> str: # noqa: N802
|
||||
"""
|
||||
a simpler version of term.Literal.n3()
|
||||
"""
|
||||
|
||||
encoded = _quote_encode(l_)
|
||||
|
||||
if l_.language:
|
||||
if l_.datatype:
|
||||
raise Exception("Literal has datatype AND language!")
|
||||
return "%s@%s" % (encoded, l_.language)
|
||||
elif l_.datatype:
|
||||
return "%s^^<%s>" % (encoded, l_.datatype)
|
||||
else:
|
||||
return "%s" % encoded
|
||||
|
||||
|
||||
def _quote_encode(l_: str) -> str:
|
||||
return '"%s"' % l_.replace("\\", "\\\\").replace("\n", "\\n").replace(
|
||||
'"', '\\"'
|
||||
).replace("\r", "\\r")
|
||||
|
||||
|
||||
def _nt_unicode_error_resolver(
|
||||
err: UnicodeError,
|
||||
) -> Tuple[Union[str, bytes], int]:
|
||||
"""
|
||||
Do unicode char replaces as defined in https://www.w3.org/TR/2004/REC-rdf-testcases-20040210/#ntrip_strings
|
||||
"""
|
||||
|
||||
def _replace_single(c):
|
||||
c = ord(c)
|
||||
fmt = "\\u%04X" if c <= 0xFFFF else "\\U%08X"
|
||||
return fmt % c
|
||||
|
||||
# type error: "UnicodeError" has no attribute "object"
|
||||
# type error: "UnicodeError" has no attribute "start"
|
||||
# type error: "UnicodeError" has no attribute "end"
|
||||
string = err.object[err.start : err.end] # type: ignore[attr-defined]
|
||||
# type error: "UnicodeError" has no attribute "end"
|
||||
return "".join(_replace_single(c) for c in string), err.end # type: ignore[attr-defined]
|
||||
|
||||
|
||||
codecs.register_error("_rdflib_nt_escape", _nt_unicode_error_resolver)
|
||||
@@ -0,0 +1,108 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import warnings
|
||||
from typing import IO, Any, Optional
|
||||
from uuid import uuid4
|
||||
|
||||
from rdflib import Dataset
|
||||
from rdflib.plugins.serializers.nquads import _nq_row
|
||||
from rdflib.plugins.serializers.nt import _nt_row
|
||||
from rdflib.serializer import Serializer
|
||||
|
||||
add_remove_methods = {"add": "A", "remove": "D"}
|
||||
|
||||
|
||||
class PatchSerializer(Serializer):
|
||||
"""
|
||||
Creates an RDF patch file to add and remove triples/quads.
|
||||
Can either:
|
||||
- Create an add or delete patch for a single Dataset.
|
||||
- Create a patch to represent the difference between two Datasets.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
store: Dataset,
|
||||
):
|
||||
self.store: Dataset = store
|
||||
super().__init__(store)
|
||||
|
||||
def serialize(
|
||||
self,
|
||||
stream: IO[bytes],
|
||||
base: Optional[str] = None,
|
||||
encoding: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""
|
||||
Serialize the store to the given stream.
|
||||
:param stream: The stream to serialize to.
|
||||
:param base: The base URI to use for the serialization.
|
||||
:param encoding: The encoding to use for the serialization.
|
||||
:param kwargs: Additional keyword arguments.
|
||||
Supported keyword arguments:
|
||||
- operation: The operation to perform. Either 'add' or 'remove'.
|
||||
- target: The target Dataset to compare against.
|
||||
NB: Only one of 'operation' or 'target' should be provided.
|
||||
- header_id: The header ID to use.
|
||||
- header_prev: The previous header ID to use.
|
||||
"""
|
||||
operation = kwargs.get("operation")
|
||||
target = kwargs.get("target")
|
||||
header_id = kwargs.get("header_id")
|
||||
header_prev = kwargs.get("header_prev")
|
||||
if not header_id:
|
||||
header_id = f"uuid:{uuid4()}"
|
||||
encoding = self.encoding
|
||||
if base is not None:
|
||||
warnings.warn("PatchSerializer does not support base.")
|
||||
if encoding is not None and encoding.lower() != self.encoding.lower():
|
||||
warnings.warn(
|
||||
"PatchSerializer does not use custom encoding. "
|
||||
f"Given encoding was: {encoding}"
|
||||
)
|
||||
|
||||
def write_header():
|
||||
stream.write(f"H id <{header_id}> .\n".encode(encoding, "replace"))
|
||||
if header_prev:
|
||||
stream.write(f"H prev <{header_prev}>\n".encode(encoding, "replace"))
|
||||
stream.write("TX .\n".encode(encoding, "replace"))
|
||||
|
||||
def write_triples(contexts, op_code, use_passed_contexts=False):
|
||||
for context in contexts:
|
||||
if not use_passed_contexts:
|
||||
context = self.store.get_context(context.identifier)
|
||||
for triple in context:
|
||||
stream.write(
|
||||
self._patch_row(triple, context.identifier, op_code).encode(
|
||||
encoding, "replace"
|
||||
)
|
||||
)
|
||||
|
||||
if operation:
|
||||
assert operation in add_remove_methods, f"Invalid operation: {operation}"
|
||||
elif not target:
|
||||
# No operation specified and no target specified
|
||||
# Fall back to default operation of "add" to prevent a no-op
|
||||
operation = "add"
|
||||
write_header()
|
||||
if operation:
|
||||
operation_code = add_remove_methods.get(operation)
|
||||
write_triples(self.store.contexts(), operation_code)
|
||||
elif target:
|
||||
to_add, to_remove = self._diff(target)
|
||||
write_triples(to_add.contexts(), "A", use_passed_contexts=True)
|
||||
write_triples(to_remove.contexts(), "D", use_passed_contexts=True)
|
||||
|
||||
stream.write("TC .\n".encode(encoding, "replace"))
|
||||
|
||||
def _diff(self, target):
|
||||
rows_to_add = target - self.store
|
||||
rows_to_remove = self.store - target
|
||||
return rows_to_add, rows_to_remove
|
||||
|
||||
def _patch_row(self, triple, context_id, operation):
|
||||
if context_id == self.store.default_context.identifier:
|
||||
return f"{operation} {_nt_row(triple)}"
|
||||
else:
|
||||
return f"{operation} {_nq_row(triple, context_id)}"
|
||||
@@ -0,0 +1,391 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import xml.dom.minidom
|
||||
from typing import IO, Any, Dict, Generator, Optional, Set, Tuple
|
||||
from xml.sax.saxutils import escape, quoteattr
|
||||
|
||||
from rdflib.collection import Collection
|
||||
from rdflib.graph import Graph
|
||||
from rdflib.namespace import RDF, RDFS, Namespace # , split_uri
|
||||
from rdflib.plugins.parsers.RDFVOC import RDFVOC
|
||||
from rdflib.plugins.serializers.xmlwriter import XMLWriter
|
||||
from rdflib.serializer import Serializer
|
||||
from rdflib.term import BNode, IdentifiedNode, Identifier, Literal, Node, URIRef
|
||||
from rdflib.util import first, more_than
|
||||
|
||||
from .xmlwriter import ESCAPE_ENTITIES
|
||||
|
||||
__all__ = ["fix", "XMLSerializer", "PrettyXMLSerializer"]
|
||||
|
||||
|
||||
class XMLSerializer(Serializer):
|
||||
def __init__(self, store: Graph):
|
||||
super(XMLSerializer, self).__init__(store)
|
||||
|
||||
def __bindings(self) -> Generator[Tuple[str, URIRef], None, None]:
|
||||
store = self.store
|
||||
nm = store.namespace_manager
|
||||
bindings: Dict[str, URIRef] = {}
|
||||
|
||||
for predicate in set(store.predicates()):
|
||||
# type error: Argument 1 to "compute_qname_strict" of "NamespaceManager" has incompatible type "Node"; expected "str"
|
||||
prefix, namespace, name = nm.compute_qname_strict(predicate) # type: ignore[arg-type]
|
||||
bindings[prefix] = URIRef(namespace)
|
||||
|
||||
RDFNS = URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#") # noqa: N806
|
||||
|
||||
if "rdf" in bindings:
|
||||
assert bindings["rdf"] == RDFNS
|
||||
else:
|
||||
bindings["rdf"] = RDFNS
|
||||
|
||||
for prefix, namespace in bindings.items():
|
||||
yield prefix, namespace
|
||||
|
||||
def serialize(
|
||||
self,
|
||||
stream: IO[bytes],
|
||||
base: Optional[str] = None,
|
||||
encoding: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
# if base is given here, use that, if not and a base is set for the graph use that
|
||||
if base is not None:
|
||||
self.base = base
|
||||
elif self.store.base is not None:
|
||||
self.base = self.store.base
|
||||
self.__stream = stream
|
||||
self.__serialized: Dict[Identifier, int] = {}
|
||||
encoding = self.encoding
|
||||
self.write = write = lambda uni: stream.write(uni.encode(encoding, "replace"))
|
||||
|
||||
# startDocument
|
||||
write('<?xml version="1.0" encoding="%s"?>\n' % self.encoding)
|
||||
|
||||
# startRDF
|
||||
write("<rdf:RDF\n")
|
||||
|
||||
# If provided, write xml:base attribute for the RDF
|
||||
if "xml_base" in kwargs:
|
||||
write(' xml:base="%s"\n' % kwargs["xml_base"])
|
||||
elif self.base:
|
||||
write(' xml:base="%s"\n' % self.base)
|
||||
# TODO:
|
||||
# assert(
|
||||
# namespaces["http://www.w3.org/1999/02/22-rdf-syntax-ns#"]=='rdf')
|
||||
bindings = list(self.__bindings())
|
||||
bindings.sort()
|
||||
|
||||
for prefix, namespace in bindings:
|
||||
if prefix:
|
||||
write(' xmlns:%s="%s"\n' % (prefix, namespace))
|
||||
else:
|
||||
write(' xmlns="%s"\n' % namespace)
|
||||
write(">\n")
|
||||
|
||||
# write out triples by subject
|
||||
for subject in self.store.subjects():
|
||||
# type error: Argument 1 to "subject" of "XMLSerializer" has incompatible type "Node"; expected "Identifier"
|
||||
self.subject(subject, 1) # type: ignore[arg-type]
|
||||
|
||||
# endRDF
|
||||
write("</rdf:RDF>\n")
|
||||
|
||||
# Set to None so that the memory can get garbage collected.
|
||||
# self.__serialized = None
|
||||
del self.__serialized
|
||||
|
||||
def subject(self, subject: Identifier, depth: int = 1) -> None:
|
||||
if subject not in self.__serialized:
|
||||
self.__serialized[subject] = 1
|
||||
|
||||
if isinstance(subject, (BNode, URIRef)):
|
||||
write = self.write
|
||||
indent = " " * depth
|
||||
element_name = "rdf:Description"
|
||||
|
||||
if isinstance(subject, BNode):
|
||||
write('%s<%s rdf:nodeID="%s"' % (indent, element_name, subject))
|
||||
else:
|
||||
uri = quoteattr(self.relativize(subject))
|
||||
write("%s<%s rdf:about=%s" % (indent, element_name, uri))
|
||||
|
||||
if (subject, None, None) in self.store:
|
||||
write(">\n")
|
||||
|
||||
for predicate, object in self.store.predicate_objects(subject):
|
||||
# type error: Argument 1 to "predicate" of "XMLSerializer" has incompatible type "Node"; expected "Identifier"
|
||||
# type error: Argument 2 to "predicate" of "XMLSerializer" has incompatible type "Node"; expected "Identifier"
|
||||
self.predicate(predicate, object, depth + 1) # type: ignore[arg-type]
|
||||
write("%s</%s>\n" % (indent, element_name))
|
||||
|
||||
else:
|
||||
write("/>\n")
|
||||
|
||||
def predicate(
|
||||
self, predicate: Identifier, object: Identifier, depth: int = 1
|
||||
) -> None:
|
||||
write = self.write
|
||||
indent = " " * depth
|
||||
qname = self.store.namespace_manager.qname_strict(predicate)
|
||||
|
||||
if isinstance(object, Literal):
|
||||
attributes = ""
|
||||
|
||||
if object.language:
|
||||
attributes += ' xml:lang="%s"' % object.language
|
||||
|
||||
if object.datatype:
|
||||
attributes += ' rdf:datatype="%s"' % object.datatype
|
||||
|
||||
write(
|
||||
"%s<%s%s>%s</%s>\n"
|
||||
% (indent, qname, attributes, escape(object, ESCAPE_ENTITIES), qname)
|
||||
)
|
||||
else:
|
||||
if isinstance(object, BNode):
|
||||
write('%s<%s rdf:nodeID="%s"/>\n' % (indent, qname, object))
|
||||
else:
|
||||
write(
|
||||
"%s<%s rdf:resource=%s/>\n"
|
||||
% (indent, qname, quoteattr(self.relativize(object)))
|
||||
)
|
||||
|
||||
|
||||
XMLLANG = "http://www.w3.org/XML/1998/namespacelang"
|
||||
XMLBASE = "http://www.w3.org/XML/1998/namespacebase"
|
||||
OWL_NS = Namespace("http://www.w3.org/2002/07/owl#")
|
||||
|
||||
|
||||
# TODO:
|
||||
def fix(val: str) -> str:
|
||||
"strip off _: from nodeIDs... as they are not valid NCNames"
|
||||
if val.startswith("_:"):
|
||||
return val[2:]
|
||||
else:
|
||||
return val
|
||||
|
||||
|
||||
class PrettyXMLSerializer(Serializer):
|
||||
def __init__(self, store: Graph, max_depth=3):
|
||||
super(PrettyXMLSerializer, self).__init__(store)
|
||||
self.forceRDFAbout: Set[URIRef] = set()
|
||||
|
||||
def serialize(
|
||||
self,
|
||||
stream: IO[bytes],
|
||||
base: Optional[str] = None,
|
||||
encoding: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
self.__serialized: Dict[Identifier, int] = {}
|
||||
store = self.store
|
||||
# if base is given here, use that, if not and a base is set for the graph use that
|
||||
if base is not None:
|
||||
self.base = base
|
||||
elif store.base is not None:
|
||||
self.base = store.base
|
||||
self.max_depth = kwargs.get("max_depth", 3)
|
||||
assert self.max_depth > 0, "max_depth must be greater than 0"
|
||||
|
||||
self.nm = nm = store.namespace_manager
|
||||
self.writer = writer = XMLWriter(stream, nm, encoding)
|
||||
namespaces = {}
|
||||
|
||||
possible: Set[Node] = set(store.predicates()).union(
|
||||
store.objects(None, RDF.type)
|
||||
)
|
||||
|
||||
for predicate in possible:
|
||||
# type error: Argument 1 to "compute_qname_strict" of "NamespaceManager" has incompatible type "Node"; expected "str"
|
||||
prefix, namespace, local = nm.compute_qname_strict(predicate) # type: ignore[arg-type]
|
||||
namespaces[prefix] = namespace
|
||||
|
||||
namespaces["rdf"] = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
|
||||
writer.push(RDFVOC.RDF)
|
||||
|
||||
if "xml_base" in kwargs:
|
||||
writer.attribute(XMLBASE, kwargs["xml_base"])
|
||||
elif self.base:
|
||||
writer.attribute(XMLBASE, self.base)
|
||||
|
||||
writer.namespaces(namespaces.items())
|
||||
|
||||
subject: IdentifiedNode
|
||||
# Write out subjects that can not be inline
|
||||
# type error: Incompatible types in assignment (expression has type "Node", variable has type "IdentifiedNode")
|
||||
for subject in store.subjects(): # type: ignore[assignment]
|
||||
if (None, None, subject) in store:
|
||||
if (subject, None, subject) in store:
|
||||
self.subject(subject, 1)
|
||||
else:
|
||||
self.subject(subject, 1)
|
||||
|
||||
# write out anything that has not yet been reached
|
||||
# write out BNodes last (to ensure they can be inlined where possible)
|
||||
bnodes = set()
|
||||
|
||||
# type error: Incompatible types in assignment (expression has type "Node", variable has type "IdentifiedNode")
|
||||
for subject in store.subjects(): # type: ignore[assignment]
|
||||
if isinstance(subject, BNode):
|
||||
bnodes.add(subject)
|
||||
continue
|
||||
self.subject(subject, 1)
|
||||
|
||||
# now serialize only those BNodes that have not been serialized yet
|
||||
for bnode in bnodes:
|
||||
if bnode not in self.__serialized:
|
||||
self.subject(subject, 1)
|
||||
|
||||
writer.pop(RDFVOC.RDF)
|
||||
stream.write("\n".encode("latin-1"))
|
||||
|
||||
# Set to None so that the memory can get garbage collected.
|
||||
self.__serialized = None # type: ignore[assignment]
|
||||
|
||||
def subject(self, subject: Identifier, depth: int = 1):
|
||||
store = self.store
|
||||
writer = self.writer
|
||||
|
||||
if subject in self.forceRDFAbout:
|
||||
writer.push(RDFVOC.Description)
|
||||
writer.attribute(RDFVOC.about, self.relativize(subject))
|
||||
writer.pop(RDFVOC.Description)
|
||||
self.forceRDFAbout.remove(subject) # type: ignore[arg-type]
|
||||
|
||||
elif subject not in self.__serialized:
|
||||
self.__serialized[subject] = 1
|
||||
type = first(store.objects(subject, RDF.type))
|
||||
|
||||
try:
|
||||
# type error: Argument 1 to "qname" of "NamespaceManager" has incompatible type "Optional[Node]"; expected "str"
|
||||
self.nm.qname(type) # type: ignore[arg-type]
|
||||
except Exception:
|
||||
type = None
|
||||
|
||||
element = type or RDFVOC.Description
|
||||
# type error: Argument 1 to "push" of "XMLWriter" has incompatible type "Node"; expected "str"
|
||||
writer.push(element) # type: ignore[arg-type]
|
||||
|
||||
if isinstance(subject, BNode):
|
||||
|
||||
def subj_as_obj_more_than(ceil):
|
||||
return True
|
||||
# more_than(store.triples((None, None, subject)), ceil)
|
||||
|
||||
# here we only include BNode labels if they are referenced
|
||||
# more than once (this reduces the use of redundant BNode
|
||||
# identifiers)
|
||||
if subj_as_obj_more_than(1):
|
||||
writer.attribute(RDFVOC.nodeID, fix(subject))
|
||||
|
||||
else:
|
||||
writer.attribute(RDFVOC.about, self.relativize(subject))
|
||||
|
||||
if (subject, None, None) in store:
|
||||
for predicate, object in store.predicate_objects(subject):
|
||||
if not (predicate == RDF.type and object == type):
|
||||
# type error: Argument 1 to "predicate" of "PrettyXMLSerializer" has incompatible type "Node"; expected "Identifier"
|
||||
# type error: Argument 2 to "predicate" of "PrettyXMLSerializer" has incompatible type "Node"; expected "Identifier"
|
||||
self.predicate(predicate, object, depth + 1) # type: ignore[arg-type]
|
||||
|
||||
# type error: Argument 1 to "pop" of "XMLWriter" has incompatible type "Node"; expected "Optional[str]"
|
||||
writer.pop(element) # type: ignore[arg-type]
|
||||
|
||||
elif subject in self.forceRDFAbout:
|
||||
# TODO FIXME?: this looks like a duplicate of first condition
|
||||
writer.push(RDFVOC.Description)
|
||||
writer.attribute(RDFVOC.about, self.relativize(subject))
|
||||
writer.pop(RDFVOC.Description)
|
||||
self.forceRDFAbout.remove(subject) # type: ignore[arg-type]
|
||||
|
||||
def predicate(
|
||||
self, predicate: Identifier, object: Identifier, depth: int = 1
|
||||
) -> None:
|
||||
writer = self.writer
|
||||
store = self.store
|
||||
writer.push(predicate)
|
||||
|
||||
if isinstance(object, Literal):
|
||||
if object.language:
|
||||
writer.attribute(XMLLANG, object.language)
|
||||
|
||||
if object.datatype == RDF.XMLLiteral and isinstance(
|
||||
object.value, xml.dom.minidom.Document
|
||||
):
|
||||
writer.attribute(RDFVOC.parseType, "Literal")
|
||||
writer.text("")
|
||||
writer.stream.write(object)
|
||||
else:
|
||||
if object.datatype:
|
||||
writer.attribute(RDFVOC.datatype, object.datatype)
|
||||
writer.text(object)
|
||||
|
||||
elif (
|
||||
object in self.__serialized
|
||||
or not (object, None, None) in store # noqa: E713
|
||||
):
|
||||
if isinstance(object, BNode):
|
||||
if more_than(store.triples((None, None, object)), 0):
|
||||
writer.attribute(RDFVOC.nodeID, fix(object))
|
||||
else:
|
||||
writer.attribute(RDFVOC.resource, self.relativize(object))
|
||||
|
||||
else:
|
||||
if first(store.objects(object, RDF.first)): # may not have type
|
||||
# RDF.List
|
||||
|
||||
self.__serialized[object] = 1
|
||||
|
||||
# Warn that any assertions on object other than
|
||||
# RDF.first and RDF.rest are ignored... including RDF.List
|
||||
import warnings
|
||||
|
||||
warnings.warn(
|
||||
"Assertions on %s other than RDF.first " % repr(object)
|
||||
+ "and RDF.rest are ignored ... including RDF.List",
|
||||
UserWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
writer.attribute(RDFVOC.parseType, "Collection")
|
||||
|
||||
col = Collection(store, object)
|
||||
|
||||
for item in col:
|
||||
if isinstance(item, URIRef):
|
||||
self.forceRDFAbout.add(item)
|
||||
# type error: Argument 1 to "subject" of "PrettyXMLSerializer" has incompatible type "Node"; expected "Identifier"
|
||||
self.subject(item) # type: ignore[arg-type]
|
||||
|
||||
if not isinstance(item, URIRef):
|
||||
# type error: Invalid index type "Node" for "Dict[Identifier, int]"; expected type "Identifier"
|
||||
self.__serialized[item] = 1 # type: ignore[index]
|
||||
else:
|
||||
if first(
|
||||
store.triples_choices(
|
||||
# type error: Argument 1 to "triples_choices" of "Graph" has incompatible type "Tuple[Identifier, URIRef, List[URIRef]]"; expected "Union[Tuple[List[Node], Node, Node], Tuple[Node, List[Node], Node], Tuple[Node, Node, List[Node]]]"
|
||||
(object, RDF.type, [OWL_NS.Class, RDFS.Class]) # type: ignore[arg-type]
|
||||
)
|
||||
) and isinstance(object, URIRef):
|
||||
writer.attribute(RDFVOC.resource, self.relativize(object))
|
||||
|
||||
elif depth <= self.max_depth:
|
||||
self.subject(object, depth + 1)
|
||||
|
||||
elif isinstance(object, BNode):
|
||||
if (
|
||||
object not in self.__serialized
|
||||
and (object, None, None) in store
|
||||
and len(list(store.subjects(object=object))) == 1
|
||||
):
|
||||
# inline blank nodes if they haven't been serialized yet
|
||||
# and are only referenced once (regardless of depth)
|
||||
self.subject(object, depth + 1)
|
||||
else:
|
||||
writer.attribute(RDFVOC.nodeID, fix(object))
|
||||
|
||||
else:
|
||||
writer.attribute(RDFVOC.resource, self.relativize(object))
|
||||
|
||||
writer.pop(predicate)
|
||||
@@ -0,0 +1,121 @@
|
||||
"""
|
||||
Trig RDF graph serializer for RDFLib.
|
||||
See <http://www.w3.org/TR/trig/> for syntax specification.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import IO, TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
|
||||
|
||||
from rdflib.graph import ConjunctiveGraph, Graph
|
||||
from rdflib.plugins.serializers.turtle import TurtleSerializer
|
||||
from rdflib.term import BNode, Node
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from rdflib.graph import _ContextType, _SubjectType
|
||||
|
||||
__all__ = ["TrigSerializer"]
|
||||
|
||||
|
||||
class TrigSerializer(TurtleSerializer):
|
||||
short_name = "trig"
|
||||
indentString = 4 * " "
|
||||
|
||||
def __init__(self, store: Union[Graph, ConjunctiveGraph]):
|
||||
self.default_context: Optional[Node]
|
||||
if store.context_aware:
|
||||
if TYPE_CHECKING:
|
||||
assert isinstance(store, ConjunctiveGraph)
|
||||
self.contexts = list(store.contexts())
|
||||
self.default_context = store.default_context.identifier
|
||||
if store.default_context:
|
||||
self.contexts.append(store.default_context)
|
||||
else:
|
||||
self.contexts = [store]
|
||||
self.default_context = None
|
||||
|
||||
super(TrigSerializer, self).__init__(store)
|
||||
|
||||
def preprocess(self) -> None:
|
||||
for context in self.contexts:
|
||||
# do not write unnecessary prefix (ex: for an empty default graph)
|
||||
if len(context) == 0:
|
||||
continue
|
||||
self.store = context
|
||||
# Don't generate a new prefix for a graph URI if one already exists
|
||||
self.getQName(context.identifier, False)
|
||||
self._subjects = {}
|
||||
|
||||
for triple in context:
|
||||
self.preprocessTriple(triple)
|
||||
|
||||
for subject in self._subjects.keys():
|
||||
self._references[subject] += 1
|
||||
|
||||
self._contexts[context] = (self.orderSubjects(), self._subjects)
|
||||
|
||||
def reset(self) -> None:
|
||||
super(TrigSerializer, self).reset()
|
||||
self._contexts: Dict[
|
||||
_ContextType,
|
||||
Tuple[List[_SubjectType], Dict[_SubjectType, bool]],
|
||||
] = {}
|
||||
|
||||
def serialize(
|
||||
self,
|
||||
stream: IO[bytes],
|
||||
base: Optional[str] = None,
|
||||
encoding: Optional[str] = None,
|
||||
spacious: Optional[bool] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
self.reset()
|
||||
self.stream = stream
|
||||
# if base is given here, use that, if not and a base is set for the graph use that
|
||||
if base is not None:
|
||||
self.base = base
|
||||
elif self.store.base is not None:
|
||||
self.base = self.store.base
|
||||
|
||||
if spacious is not None:
|
||||
self._spacious = spacious
|
||||
|
||||
self.preprocess()
|
||||
|
||||
self.startDocument()
|
||||
|
||||
firstTime = True
|
||||
for store, (ordered_subjects, subjects) in self._contexts.items():
|
||||
if not ordered_subjects:
|
||||
continue
|
||||
|
||||
self._serialized = {}
|
||||
self.store = store
|
||||
self._subjects = subjects
|
||||
|
||||
if self.default_context and store.identifier == self.default_context:
|
||||
self.write(self.indent() + "\n{")
|
||||
else:
|
||||
iri: Optional[str]
|
||||
if isinstance(store.identifier, BNode):
|
||||
iri = store.identifier.n3()
|
||||
else:
|
||||
# Show the full graph URI if a prefix for it doesn't already exist
|
||||
iri = self.getQName(store.identifier, False)
|
||||
if iri is None:
|
||||
iri = store.identifier.n3()
|
||||
self.write(self.indent() + "\n%s {" % iri)
|
||||
|
||||
self.depth += 1
|
||||
for subject in ordered_subjects:
|
||||
if self.isDone(subject):
|
||||
continue
|
||||
if firstTime:
|
||||
firstTime = False
|
||||
if self.statement(subject) and not firstTime:
|
||||
self.write("\n")
|
||||
self.depth -= 1
|
||||
self.write("}\n")
|
||||
|
||||
self.endDocument()
|
||||
stream.write("\n".encode("latin-1"))
|
||||
@@ -0,0 +1,91 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import IO, Any, Optional
|
||||
|
||||
from rdflib.graph import ConjunctiveGraph, Graph
|
||||
from rdflib.namespace import Namespace
|
||||
from rdflib.plugins.serializers.xmlwriter import XMLWriter
|
||||
from rdflib.serializer import Serializer
|
||||
from rdflib.term import BNode, Literal, URIRef
|
||||
|
||||
__all__ = ["TriXSerializer"]
|
||||
|
||||
# TODO: Move this somewhere central
|
||||
TRIXNS = Namespace("http://www.w3.org/2004/03/trix/trix-1/")
|
||||
XMLNS = Namespace("http://www.w3.org/XML/1998/namespace")
|
||||
|
||||
|
||||
class TriXSerializer(Serializer):
|
||||
def __init__(self, store: Graph):
|
||||
super(TriXSerializer, self).__init__(store)
|
||||
if not store.context_aware:
|
||||
raise Exception(
|
||||
"TriX serialization only makes sense for context-aware stores"
|
||||
)
|
||||
|
||||
def serialize(
|
||||
self,
|
||||
stream: IO[bytes],
|
||||
base: Optional[str] = None,
|
||||
encoding: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
nm = self.store.namespace_manager
|
||||
|
||||
self.writer = XMLWriter(stream, nm, encoding, extra_ns={"": TRIXNS})
|
||||
|
||||
self.writer.push(TRIXNS["TriX"])
|
||||
# if base is given here, use that, if not and a base is set for the graph use that
|
||||
if base is None and self.store.base is not None:
|
||||
base = self.store.base
|
||||
if base is not None:
|
||||
self.writer.attribute("http://www.w3.org/XML/1998/namespacebase", base)
|
||||
self.writer.namespaces()
|
||||
|
||||
if isinstance(self.store, ConjunctiveGraph):
|
||||
for subgraph in self.store.contexts():
|
||||
self._writeGraph(subgraph)
|
||||
elif isinstance(self.store, Graph):
|
||||
self._writeGraph(self.store)
|
||||
else:
|
||||
raise Exception(f"Unknown graph type: {type(self.store)}")
|
||||
|
||||
self.writer.pop()
|
||||
stream.write("\n".encode("latin-1"))
|
||||
|
||||
def _writeGraph(self, graph): # noqa: N802
|
||||
self.writer.push(TRIXNS["graph"])
|
||||
if graph.base:
|
||||
self.writer.attribute(
|
||||
"http://www.w3.org/XML/1998/namespacebase", graph.base
|
||||
)
|
||||
if isinstance(graph.identifier, URIRef):
|
||||
self.writer.element(TRIXNS["uri"], content=str(graph.identifier))
|
||||
|
||||
for triple in graph.triples((None, None, None)):
|
||||
self._writeTriple(triple)
|
||||
self.writer.pop()
|
||||
|
||||
def _writeTriple(self, triple): # noqa: N802
|
||||
self.writer.push(TRIXNS["triple"])
|
||||
for component in triple:
|
||||
if isinstance(component, URIRef):
|
||||
self.writer.element(TRIXNS["uri"], content=str(component))
|
||||
elif isinstance(component, BNode):
|
||||
self.writer.element(TRIXNS["id"], content=str(component))
|
||||
elif isinstance(component, Literal):
|
||||
if component.datatype:
|
||||
self.writer.element(
|
||||
TRIXNS["typedLiteral"],
|
||||
content=str(component),
|
||||
attributes={TRIXNS["datatype"]: str(component.datatype)},
|
||||
)
|
||||
elif component.language:
|
||||
self.writer.element(
|
||||
TRIXNS["plainLiteral"],
|
||||
content=str(component),
|
||||
attributes={XMLNS["lang"]: str(component.language)},
|
||||
)
|
||||
else:
|
||||
self.writer.element(TRIXNS["plainLiteral"], content=str(component))
|
||||
self.writer.pop()
|
||||
@@ -0,0 +1,453 @@
|
||||
"""
|
||||
Turtle RDF graph serializer for RDFLib.
|
||||
See <http://www.w3.org/TeamSubmission/turtle/> for syntax specification.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections import defaultdict
|
||||
from typing import (
|
||||
IO,
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
DefaultDict,
|
||||
Dict,
|
||||
List,
|
||||
Mapping,
|
||||
Optional,
|
||||
Sequence,
|
||||
Tuple,
|
||||
)
|
||||
|
||||
from rdflib.exceptions import Error
|
||||
from rdflib.graph import Graph
|
||||
from rdflib.namespace import RDF, RDFS
|
||||
from rdflib.serializer import Serializer
|
||||
from rdflib.term import BNode, Literal, Node, URIRef
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from rdflib.graph import _PredicateType, _SubjectType, _TripleType
|
||||
|
||||
__all__ = ["RecursiveSerializer", "TurtleSerializer"]
|
||||
|
||||
|
||||
class RecursiveSerializer(Serializer):
|
||||
topClasses = [RDFS.Class]
|
||||
predicateOrder = [RDF.type, RDFS.label]
|
||||
maxDepth = 10
|
||||
indentString = " "
|
||||
roundtrip_prefixes: Tuple[Any, ...] = ()
|
||||
|
||||
def __init__(self, store: Graph):
|
||||
super(RecursiveSerializer, self).__init__(store)
|
||||
self.stream: Optional[IO[bytes]] = None
|
||||
self.reset()
|
||||
|
||||
def addNamespace(self, prefix: str, uri: URIRef) -> None:
|
||||
if prefix in self.namespaces and self.namespaces[prefix] != uri:
|
||||
raise Exception(
|
||||
"Trying to override namespace prefix %s => %s, but it's already bound to %s"
|
||||
% (prefix, uri, self.namespaces[prefix])
|
||||
)
|
||||
self.namespaces[prefix] = uri
|
||||
|
||||
def checkSubject(self, subject: _SubjectType) -> bool:
|
||||
"""Check to see if the subject should be serialized yet"""
|
||||
if (
|
||||
(self.isDone(subject))
|
||||
or (subject not in self._subjects)
|
||||
or ((subject in self._topLevels) and (self.depth > 1))
|
||||
or (isinstance(subject, URIRef) and (self.depth >= self.maxDepth))
|
||||
):
|
||||
return False
|
||||
return True
|
||||
|
||||
def isDone(self, subject: _SubjectType) -> bool:
|
||||
"""Return true if subject is serialized"""
|
||||
return subject in self._serialized
|
||||
|
||||
def orderSubjects(self) -> List[_SubjectType]:
|
||||
seen: Dict[_SubjectType, bool] = {}
|
||||
subjects: List[_SubjectType] = []
|
||||
|
||||
for classURI in self.topClasses:
|
||||
members = list(self.store.subjects(RDF.type, classURI))
|
||||
members.sort()
|
||||
|
||||
subjects.extend(members)
|
||||
for member in members:
|
||||
self._topLevels[member] = True
|
||||
seen[member] = True
|
||||
|
||||
recursable = [
|
||||
(isinstance(subject, BNode), self._references[subject], subject)
|
||||
for subject in self._subjects
|
||||
if subject not in seen
|
||||
]
|
||||
|
||||
recursable.sort()
|
||||
subjects.extend([subject for (isbnode, refs, subject) in recursable])
|
||||
|
||||
return subjects
|
||||
|
||||
def preprocess(self) -> None:
|
||||
for triple in self.store.triples((None, None, None)):
|
||||
self.preprocessTriple(triple)
|
||||
|
||||
def preprocessTriple(self, spo: _TripleType) -> None:
|
||||
s, p, o = spo
|
||||
self._references[o] += 1
|
||||
self._subjects[s] = True
|
||||
|
||||
def reset(self) -> None:
|
||||
self.depth = 0
|
||||
# Typed none because nothing is using it ...
|
||||
self.lists: Dict[None, None] = {}
|
||||
self.namespaces: Dict[str, URIRef] = {}
|
||||
self._references: DefaultDict[Node, int] = defaultdict(int)
|
||||
self._serialized: Dict[_SubjectType, bool] = {}
|
||||
self._subjects: Dict[_SubjectType, bool] = {}
|
||||
self._topLevels: Dict[_SubjectType, bool] = {}
|
||||
|
||||
if self.roundtrip_prefixes:
|
||||
if hasattr(self.roundtrip_prefixes, "__iter__"):
|
||||
for prefix, ns in self.store.namespaces():
|
||||
if prefix in self.roundtrip_prefixes:
|
||||
self.addNamespace(prefix, ns)
|
||||
else:
|
||||
for prefix, ns in self.store.namespaces():
|
||||
self.addNamespace(prefix, ns)
|
||||
|
||||
def buildPredicateHash(
|
||||
self, subject: _SubjectType
|
||||
) -> Mapping[_PredicateType, List[Node]]:
|
||||
"""
|
||||
Build a hash key by predicate to a list of objects for the given
|
||||
subject
|
||||
"""
|
||||
properties: Dict[_PredicateType, List[Node]] = {}
|
||||
for s, p, o in self.store.triples((subject, None, None)):
|
||||
oList = properties.get(p, [])
|
||||
oList.append(o)
|
||||
properties[p] = oList
|
||||
return properties
|
||||
|
||||
def sortProperties(
|
||||
self, properties: Mapping[_PredicateType, List[Node]]
|
||||
) -> List[_PredicateType]:
|
||||
"""Take a hash from predicate uris to lists of values.
|
||||
Sort the lists of values. Return a sorted list of properties."""
|
||||
# Sort object lists
|
||||
for prop, objects in properties.items():
|
||||
objects.sort()
|
||||
|
||||
# Make sorted list of properties
|
||||
propList: List[_PredicateType] = []
|
||||
seen: Dict[_PredicateType, bool] = {}
|
||||
for prop in self.predicateOrder:
|
||||
if (prop in properties) and (prop not in seen):
|
||||
propList.append(prop)
|
||||
seen[prop] = True
|
||||
props = list(properties.keys())
|
||||
props.sort()
|
||||
for prop in props:
|
||||
if prop not in seen:
|
||||
propList.append(prop)
|
||||
seen[prop] = True
|
||||
return propList
|
||||
|
||||
def subjectDone(self, subject: _SubjectType) -> None:
|
||||
"""Mark a subject as done."""
|
||||
self._serialized[subject] = True
|
||||
|
||||
def indent(self, modifier: int = 0) -> str:
|
||||
"""Returns indent string multiplied by the depth"""
|
||||
return (self.depth + modifier) * self.indentString
|
||||
|
||||
def write(self, text: str) -> None:
|
||||
"""Write text in given encoding."""
|
||||
# type error: Item "None" of "Optional[IO[bytes]]" has no attribute "write"
|
||||
self.stream.write(text.encode(self.encoding, "replace")) # type: ignore[union-attr]
|
||||
|
||||
|
||||
SUBJECT = 0
|
||||
VERB = 1
|
||||
OBJECT = 2
|
||||
|
||||
_GEN_QNAME_FOR_DT = False
|
||||
_SPACIOUS_OUTPUT = False
|
||||
|
||||
|
||||
class TurtleSerializer(RecursiveSerializer):
|
||||
short_name = "turtle"
|
||||
indentString = " "
|
||||
|
||||
def __init__(self, store: Graph):
|
||||
self._ns_rewrite: Dict[str, str] = {}
|
||||
super(TurtleSerializer, self).__init__(store)
|
||||
self.keywords: Dict[Node, str] = {RDF.type: "a"}
|
||||
self.reset()
|
||||
self.stream = None
|
||||
self._spacious = _SPACIOUS_OUTPUT
|
||||
|
||||
# type error: Return type "str" of "addNamespace" incompatible with return type "None" in supertype "RecursiveSerializer"
|
||||
def addNamespace(self, prefix: str, namespace: URIRef) -> str: # type: ignore[override]
|
||||
# Turtle does not support prefix that start with _
|
||||
# if they occur in the graph, rewrite to p_blah
|
||||
# this is more complicated since we need to make sure p_blah
|
||||
# does not already exist. And we register namespaces as we go, i.e.
|
||||
# we may first see a triple with prefix _9 - rewrite it to p_9
|
||||
# and then later find a triple with a "real" p_9 prefix
|
||||
|
||||
# so we need to keep track of ns rewrites we made so far.
|
||||
|
||||
if (prefix > "" and prefix[0] == "_") or self.namespaces.get(
|
||||
prefix, namespace
|
||||
) != namespace:
|
||||
if prefix not in self._ns_rewrite:
|
||||
p = "p" + prefix
|
||||
while p in self.namespaces:
|
||||
p = "p" + p
|
||||
self._ns_rewrite[prefix] = p
|
||||
|
||||
prefix = self._ns_rewrite.get(prefix, prefix)
|
||||
|
||||
super(TurtleSerializer, self).addNamespace(prefix, namespace)
|
||||
return prefix
|
||||
|
||||
def reset(self) -> None:
|
||||
super(TurtleSerializer, self).reset()
|
||||
# typing as Dict[None, None] because nothing seems to be using it
|
||||
self._shortNames: Dict[None, None] = {}
|
||||
self._started = False
|
||||
self._ns_rewrite = {}
|
||||
|
||||
def serialize(
|
||||
self,
|
||||
stream: IO[bytes],
|
||||
base: Optional[str] = None,
|
||||
encoding: Optional[str] = None,
|
||||
spacious: Optional[bool] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
self.reset()
|
||||
self.stream = stream
|
||||
# if base is given here, use that, if not and a base is set for the graph use that
|
||||
if base is not None:
|
||||
self.base = base
|
||||
elif self.store.base is not None:
|
||||
self.base = self.store.base
|
||||
|
||||
if spacious is not None:
|
||||
self._spacious = spacious
|
||||
|
||||
self.preprocess()
|
||||
subjects_list = self.orderSubjects()
|
||||
|
||||
self.startDocument()
|
||||
|
||||
firstTime = True
|
||||
for subject in subjects_list:
|
||||
if self.isDone(subject):
|
||||
continue
|
||||
if firstTime:
|
||||
firstTime = False
|
||||
if self.statement(subject) and not firstTime:
|
||||
self.write("\n")
|
||||
|
||||
self.endDocument()
|
||||
stream.write("\n".encode("latin-1"))
|
||||
|
||||
self.base = None
|
||||
|
||||
def preprocessTriple(self, triple: _TripleType) -> None:
|
||||
super(TurtleSerializer, self).preprocessTriple(triple)
|
||||
for i, node in enumerate(triple):
|
||||
if i == VERB and node in self.keywords:
|
||||
# predicate is a keyword
|
||||
continue
|
||||
# Don't use generated prefixes for subjects and objects
|
||||
self.getQName(node, gen_prefix=(i == VERB))
|
||||
if isinstance(node, Literal) and node.datatype:
|
||||
self.getQName(node.datatype, gen_prefix=_GEN_QNAME_FOR_DT)
|
||||
p = triple[1]
|
||||
if isinstance(p, BNode): # hmm - when is P ever a bnode?
|
||||
self._references[p] += 1
|
||||
|
||||
# TODO: Rename to get_pname
|
||||
def getQName(self, uri: Node, gen_prefix: bool = True) -> Optional[str]:
|
||||
if not isinstance(uri, URIRef):
|
||||
return None
|
||||
|
||||
parts = None
|
||||
|
||||
try:
|
||||
parts = self.store.compute_qname(uri, generate=gen_prefix)
|
||||
except Exception:
|
||||
# is the uri a namespace in itself?
|
||||
pfx = self.store.store.prefix(uri)
|
||||
|
||||
if pfx is not None:
|
||||
parts = (pfx, uri, "")
|
||||
else:
|
||||
# nothing worked
|
||||
return None
|
||||
|
||||
prefix, namespace, local = parts
|
||||
|
||||
local = local.replace(r"(", r"\(").replace(r")", r"\)")
|
||||
|
||||
# QName cannot end with .
|
||||
if local.endswith("."):
|
||||
return None
|
||||
|
||||
prefix = self.addNamespace(prefix, namespace)
|
||||
|
||||
return "%s:%s" % (prefix, local)
|
||||
|
||||
def startDocument(self) -> None:
|
||||
self._started = True
|
||||
ns_list = sorted(self.namespaces.items())
|
||||
|
||||
if self.base:
|
||||
self.write(self.indent() + "@base <%s> .\n" % self.base)
|
||||
for prefix, uri in ns_list:
|
||||
self.write(self.indent() + "@prefix %s: <%s> .\n" % (prefix, uri))
|
||||
if ns_list and self._spacious:
|
||||
self.write("\n")
|
||||
|
||||
def endDocument(self) -> None:
|
||||
if self._spacious:
|
||||
self.write("\n")
|
||||
|
||||
def statement(self, subject: _SubjectType) -> bool:
|
||||
self.subjectDone(subject)
|
||||
return self.s_squared(subject) or self.s_default(subject)
|
||||
|
||||
def s_default(self, subject: _SubjectType) -> bool:
|
||||
self.write("\n" + self.indent())
|
||||
self.path(subject, SUBJECT)
|
||||
self.predicateList(subject)
|
||||
self.write(" .")
|
||||
return True
|
||||
|
||||
def s_squared(self, subject: _SubjectType) -> bool:
|
||||
if (self._references[subject] > 0) or not isinstance(subject, BNode):
|
||||
return False
|
||||
self.write("\n" + self.indent() + "[]")
|
||||
self.predicateList(subject)
|
||||
self.write(" .")
|
||||
return True
|
||||
|
||||
def path(self, node: Node, position: int, newline: bool = False) -> None:
|
||||
if not (
|
||||
self.p_squared(node, position, newline)
|
||||
or self.p_default(node, position, newline)
|
||||
):
|
||||
raise Error("Cannot serialize node '%s'" % (node,))
|
||||
|
||||
def p_default(self, node: Node, position: int, newline: bool = False) -> bool:
|
||||
if position != SUBJECT and not newline:
|
||||
self.write(" ")
|
||||
self.write(self.label(node, position))
|
||||
return True
|
||||
|
||||
def label(self, node: Node, position: int) -> str:
|
||||
if node == RDF.nil:
|
||||
return "()"
|
||||
if position is VERB and node in self.keywords:
|
||||
return self.keywords[node]
|
||||
if isinstance(node, Literal):
|
||||
return node._literal_n3(
|
||||
use_plain=True,
|
||||
qname_callback=lambda dt: self.getQName(dt, _GEN_QNAME_FOR_DT),
|
||||
)
|
||||
else:
|
||||
node = self.relativize(node) # type: ignore[type-var]
|
||||
|
||||
return self.getQName(node, position == VERB) or node.n3()
|
||||
|
||||
def p_squared(self, node: Node, position: int, newline: bool = False) -> bool:
|
||||
if (
|
||||
not isinstance(node, BNode)
|
||||
or node in self._serialized
|
||||
or self._references[node] > 1
|
||||
or position == SUBJECT
|
||||
):
|
||||
return False
|
||||
|
||||
if not newline:
|
||||
self.write(" ")
|
||||
|
||||
if self.isValidList(node):
|
||||
# this is a list
|
||||
self.write("(")
|
||||
self.depth += 1 # 2
|
||||
self.doList(node)
|
||||
self.depth -= 1 # 2
|
||||
self.write(" )")
|
||||
else:
|
||||
self.subjectDone(node)
|
||||
self.depth += 2
|
||||
# self.write('[\n' + self.indent())
|
||||
self.write("[")
|
||||
self.depth -= 1
|
||||
# self.predicateList(node, newline=True)
|
||||
self.predicateList(node, newline=False)
|
||||
# self.write('\n' + self.indent() + ']')
|
||||
self.write(" ]")
|
||||
self.depth -= 1
|
||||
|
||||
return True
|
||||
|
||||
def isValidList(self, l_: Node) -> bool:
|
||||
"""
|
||||
Checks if l is a valid RDF list, i.e. no nodes have other properties.
|
||||
"""
|
||||
try:
|
||||
if self.store.value(l_, RDF.first) is None:
|
||||
return False
|
||||
except Exception:
|
||||
return False
|
||||
while l_:
|
||||
if l_ != RDF.nil and len(list(self.store.predicate_objects(l_))) != 2:
|
||||
return False
|
||||
# type error: Incompatible types in assignment (expression has type "Optional[Node]", variable has type "Node")
|
||||
l_ = self.store.value(l_, RDF.rest) # type: ignore[assignment]
|
||||
return True
|
||||
|
||||
def doList(self, l_: Node) -> None:
|
||||
while l_:
|
||||
item = self.store.value(l_, RDF.first)
|
||||
if item is not None:
|
||||
self.path(item, OBJECT)
|
||||
self.subjectDone(l_)
|
||||
# type error: Incompatible types in assignment (expression has type "Optional[Node]", variable has type "Node")
|
||||
l_ = self.store.value(l_, RDF.rest) # type: ignore[assignment]
|
||||
|
||||
def predicateList(self, subject: Node, newline: bool = False) -> None:
|
||||
properties = self.buildPredicateHash(subject)
|
||||
propList = self.sortProperties(properties)
|
||||
if len(propList) == 0:
|
||||
return
|
||||
self.verb(propList[0], newline=newline)
|
||||
self.objectList(properties[propList[0]])
|
||||
for predicate in propList[1:]:
|
||||
self.write(" ;\n" + self.indent(1))
|
||||
self.verb(predicate, newline=True)
|
||||
self.objectList(properties[predicate])
|
||||
|
||||
def verb(self, node: Node, newline: bool = False) -> None:
|
||||
self.path(node, VERB, newline)
|
||||
|
||||
def objectList(self, objects: Sequence[Node]) -> None:
|
||||
count = len(objects)
|
||||
if count == 0:
|
||||
return
|
||||
depthmod = (count == 1) and 0 or 1
|
||||
self.depth += depthmod
|
||||
self.path(objects[0], OBJECT)
|
||||
for obj in objects[1:]:
|
||||
self.write(",\n" + self.indent(1))
|
||||
self.path(obj, OBJECT, newline=True)
|
||||
self.depth -= depthmod
|
||||
+128
@@ -0,0 +1,128 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import codecs
|
||||
from typing import IO, TYPE_CHECKING, Dict, Iterable, List, Optional, Tuple
|
||||
from xml.sax.saxutils import escape, quoteattr
|
||||
|
||||
from rdflib.term import URIRef
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from rdflib.namespace import Namespace, NamespaceManager
|
||||
|
||||
|
||||
__all__ = ["XMLWriter"]
|
||||
|
||||
ESCAPE_ENTITIES = {"\r": " "}
|
||||
|
||||
|
||||
class XMLWriter:
|
||||
def __init__(
|
||||
self,
|
||||
stream: IO[bytes],
|
||||
namespace_manager: NamespaceManager,
|
||||
encoding: Optional[str] = None,
|
||||
decl: int = 1,
|
||||
extra_ns: Optional[Dict[str, Namespace]] = None,
|
||||
):
|
||||
encoding = encoding or "utf-8"
|
||||
encoder, decoder, stream_reader, stream_writer = codecs.lookup(encoding)
|
||||
# NOTE on type ignores: this is mainly because the variable is being re-used.
|
||||
# type error: Incompatible types in assignment (expression has type "StreamWriter", variable has type "IO[bytes]")
|
||||
self.stream = stream = stream_writer(stream) # type: ignore[assignment]
|
||||
if decl:
|
||||
# type error: No overload variant of "write" of "IO" matches argument type "str"
|
||||
stream.write('<?xml version="1.0" encoding="%s"?>' % encoding) # type: ignore[call-overload]
|
||||
self.element_stack: List[str] = []
|
||||
self.nm = namespace_manager
|
||||
self.extra_ns = extra_ns or {}
|
||||
self.closed = True
|
||||
|
||||
def __get_indent(self) -> str:
|
||||
return " " * len(self.element_stack)
|
||||
|
||||
indent = property(__get_indent)
|
||||
|
||||
def __close_start_tag(self) -> None:
|
||||
if not self.closed: # TODO:
|
||||
self.closed = True
|
||||
self.stream.write(">")
|
||||
|
||||
def push(self, uri: str) -> None:
|
||||
self.__close_start_tag()
|
||||
write = self.stream.write
|
||||
write("\n")
|
||||
write(self.indent)
|
||||
write("<%s" % self.qname(uri))
|
||||
self.element_stack.append(uri)
|
||||
self.closed = False
|
||||
self.parent = False
|
||||
|
||||
def pop(self, uri: Optional[str] = None) -> None:
|
||||
top = self.element_stack.pop()
|
||||
if uri:
|
||||
assert uri == top
|
||||
write = self.stream.write
|
||||
if not self.closed:
|
||||
self.closed = True
|
||||
write("/>")
|
||||
else:
|
||||
if self.parent:
|
||||
write("\n")
|
||||
write(self.indent)
|
||||
write("</%s>" % self.qname(top))
|
||||
self.parent = True
|
||||
|
||||
def element(
|
||||
self, uri: str, content: str, attributes: Dict[URIRef, str] = {}
|
||||
) -> None:
|
||||
"""Utility method for adding a complete simple element"""
|
||||
self.push(uri)
|
||||
for k, v in attributes.items():
|
||||
self.attribute(k, v)
|
||||
self.text(content)
|
||||
self.pop()
|
||||
|
||||
def namespaces(self, namespaces: Iterable[Tuple[str, str]] = None) -> None:
|
||||
if not namespaces:
|
||||
namespaces = self.nm.namespaces()
|
||||
|
||||
write = self.stream.write
|
||||
write("\n")
|
||||
for prefix, namespace in namespaces:
|
||||
if prefix:
|
||||
write(' xmlns:%s="%s"\n' % (prefix, namespace))
|
||||
# Allow user-provided namespace bindings to prevail
|
||||
elif prefix not in self.extra_ns:
|
||||
write(' xmlns="%s"\n' % namespace)
|
||||
|
||||
for prefix, namespace in self.extra_ns.items():
|
||||
if prefix:
|
||||
write(' xmlns:%s="%s"\n' % (prefix, namespace))
|
||||
else:
|
||||
write(' xmlns="%s"\n' % namespace)
|
||||
|
||||
def attribute(self, uri: str, value: str) -> None:
|
||||
write = self.stream.write
|
||||
write(" %s=%s" % (self.qname(uri), quoteattr(value)))
|
||||
|
||||
def text(self, text: str) -> None:
|
||||
self.__close_start_tag()
|
||||
if "<" in text and ">" in text and "]]>" not in text:
|
||||
self.stream.write("<![CDATA[")
|
||||
self.stream.write(text)
|
||||
self.stream.write("]]>")
|
||||
else:
|
||||
self.stream.write(escape(text, ESCAPE_ENTITIES))
|
||||
|
||||
def qname(self, uri: str) -> str:
|
||||
"""Compute qname for a uri using our extra namespaces,
|
||||
or the given namespace manager"""
|
||||
|
||||
for pre, ns in self.extra_ns.items():
|
||||
if uri.startswith(ns):
|
||||
if pre != "":
|
||||
return ":".join([pre, uri[len(ns) :]])
|
||||
else:
|
||||
return uri[len(ns) :]
|
||||
|
||||
return self.nm.qname_strict(uri)
|
||||
Reference in New Issue
Block a user