2025-12-01
This commit is contained in:
@@ -0,0 +1,63 @@
|
||||
"""
|
||||
SPARQL implementation for RDFLib
|
||||
|
||||
.. versionadded:: 4.0
|
||||
"""
|
||||
|
||||
from importlib.metadata import entry_points
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
SPARQL_LOAD_GRAPHS = True
|
||||
"""
|
||||
If True, using FROM <uri> and FROM NAMED <uri>
|
||||
will load/parse more data
|
||||
"""
|
||||
|
||||
|
||||
SPARQL_DEFAULT_GRAPH_UNION = True
|
||||
"""
|
||||
If True - the default graph in the RDF Dataset is the union of all
|
||||
named graphs (like RDFLib's ConjunctiveGraph)
|
||||
"""
|
||||
|
||||
|
||||
CUSTOM_EVALS = {}
|
||||
"""
|
||||
Custom evaluation functions
|
||||
|
||||
These must be functions taking (ctx, part) and raise
|
||||
NotImplementedError if they cannot handle a certain part
|
||||
"""
|
||||
|
||||
|
||||
PLUGIN_ENTRY_POINT = "rdf.plugins.sparqleval"
|
||||
|
||||
|
||||
from . import operators, parser, parserutils
|
||||
from .processor import prepareQuery, prepareUpdate, processUpdate
|
||||
|
||||
assert parser
|
||||
assert operators
|
||||
assert parserutils
|
||||
|
||||
|
||||
all_entry_points = entry_points()
|
||||
if hasattr(all_entry_points, "select"):
|
||||
for ep in all_entry_points.select(group=PLUGIN_ENTRY_POINT):
|
||||
CUSTOM_EVALS[ep.name] = ep.load()
|
||||
else:
|
||||
# Prior to Python 3.10, this returns a dict instead of the selection interface
|
||||
if TYPE_CHECKING:
|
||||
assert isinstance(all_entry_points, dict)
|
||||
for ep in all_entry_points.get(PLUGIN_ENTRY_POINT, []):
|
||||
CUSTOM_EVALS[ep.name] = ep.load()
|
||||
|
||||
__all__ = [
|
||||
"prepareQuery",
|
||||
"prepareUpdate",
|
||||
"processUpdate",
|
||||
"operators",
|
||||
"parser",
|
||||
"parserutils",
|
||||
"CUSTOM_EVALS",
|
||||
]
|
||||
@@ -0,0 +1,316 @@
|
||||
"""
|
||||
Aggregation functions
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from decimal import Decimal
|
||||
from typing import (
|
||||
Any,
|
||||
Callable,
|
||||
Dict,
|
||||
Iterable,
|
||||
List,
|
||||
Mapping,
|
||||
MutableMapping,
|
||||
Optional,
|
||||
Set,
|
||||
Tuple,
|
||||
TypeVar,
|
||||
Union,
|
||||
overload,
|
||||
)
|
||||
|
||||
from rdflib.namespace import XSD
|
||||
from rdflib.plugins.sparql.datatypes import type_promotion
|
||||
from rdflib.plugins.sparql.evalutils import _eval, _val
|
||||
from rdflib.plugins.sparql.operators import numeric
|
||||
from rdflib.plugins.sparql.parserutils import CompValue
|
||||
from rdflib.plugins.sparql.sparql import FrozenBindings, NotBoundError, SPARQLTypeError
|
||||
from rdflib.term import BNode, Identifier, Literal, URIRef, Variable
|
||||
|
||||
|
||||
class Accumulator:
|
||||
"""abstract base class for different aggregation functions"""
|
||||
|
||||
def __init__(self, aggregation: CompValue):
|
||||
self.get_value: Callable[[], Optional[Literal]]
|
||||
self.update: Callable[[FrozenBindings, Aggregator], None]
|
||||
self.var = aggregation.res
|
||||
self.expr = aggregation.vars
|
||||
if not aggregation.distinct:
|
||||
# type error: Cannot assign to a method
|
||||
self.use_row = self.dont_care # type: ignore[method-assign]
|
||||
self.distinct = False
|
||||
else:
|
||||
self.distinct = aggregation.distinct
|
||||
self.seen: Set[Any] = set()
|
||||
|
||||
def dont_care(self, row: FrozenBindings) -> bool:
|
||||
"""skips distinct test"""
|
||||
return True
|
||||
|
||||
def use_row(self, row: FrozenBindings) -> bool:
|
||||
"""tests distinct with set"""
|
||||
return _eval(self.expr, row) not in self.seen
|
||||
|
||||
def set_value(self, bindings: MutableMapping[Variable, Identifier]) -> None:
|
||||
"""sets final value in bindings"""
|
||||
# type error: Incompatible types in assignment (expression has type "Optional[Literal]", target has type "Identifier")
|
||||
bindings[self.var] = self.get_value() # type: ignore[assignment]
|
||||
|
||||
|
||||
class Counter(Accumulator):
|
||||
def __init__(self, aggregation: CompValue):
|
||||
super(Counter, self).__init__(aggregation)
|
||||
self.value = 0
|
||||
if self.expr == "*":
|
||||
# cannot eval "*" => always use the full row
|
||||
# type error: Cannot assign to a method
|
||||
self.eval_row = self.eval_full_row # type: ignore[assignment]
|
||||
|
||||
def update(self, row: FrozenBindings, aggregator: Aggregator) -> None:
|
||||
try:
|
||||
val = self.eval_row(row)
|
||||
except NotBoundError:
|
||||
# skip UNDEF
|
||||
return
|
||||
self.value += 1
|
||||
if self.distinct:
|
||||
self.seen.add(val)
|
||||
|
||||
def get_value(self) -> Literal:
|
||||
return Literal(self.value)
|
||||
|
||||
def eval_row(self, row: FrozenBindings) -> Identifier:
|
||||
return _eval(self.expr, row)
|
||||
|
||||
def eval_full_row(self, row: FrozenBindings) -> FrozenBindings:
|
||||
return row
|
||||
|
||||
def use_row(self, row: FrozenBindings) -> bool:
|
||||
try:
|
||||
return self.eval_row(row) not in self.seen
|
||||
except NotBoundError:
|
||||
# happens when counting zero optional nodes. See issue #2229
|
||||
return False
|
||||
|
||||
|
||||
@overload
|
||||
def type_safe_numbers(*args: int) -> Tuple[int]: ...
|
||||
|
||||
|
||||
@overload
|
||||
def type_safe_numbers(
|
||||
*args: Union[Decimal, float, int]
|
||||
) -> Tuple[Union[float, int]]: ...
|
||||
|
||||
|
||||
def type_safe_numbers(*args: Union[Decimal, float, int]) -> Iterable[Union[float, int]]:
|
||||
if any(isinstance(arg, float) for arg in args) and any(
|
||||
isinstance(arg, Decimal) for arg in args
|
||||
):
|
||||
return map(float, args)
|
||||
# type error: Incompatible return value type (got "Tuple[Union[Decimal, float, int], ...]", expected "Iterable[Union[float, int]]")
|
||||
# NOTE on type error: if args contains a Decimal it will nopt get here.
|
||||
return args # type: ignore[return-value]
|
||||
|
||||
|
||||
class Sum(Accumulator):
|
||||
def __init__(self, aggregation: CompValue):
|
||||
super(Sum, self).__init__(aggregation)
|
||||
self.value = 0
|
||||
self.datatype: Optional[str] = None
|
||||
|
||||
def update(self, row: FrozenBindings, aggregator: Aggregator) -> None:
|
||||
try:
|
||||
value = _eval(self.expr, row)
|
||||
dt = self.datatype
|
||||
if dt is None:
|
||||
dt = value.datatype
|
||||
else:
|
||||
# type error: Argument 1 to "type_promotion" has incompatible type "str"; expected "URIRef"
|
||||
dt = type_promotion(dt, value.datatype) # type: ignore[arg-type]
|
||||
self.datatype = dt
|
||||
self.value = sum(type_safe_numbers(self.value, numeric(value)))
|
||||
if self.distinct:
|
||||
self.seen.add(value)
|
||||
except NotBoundError:
|
||||
# skip UNDEF
|
||||
pass
|
||||
|
||||
def get_value(self) -> Literal:
|
||||
return Literal(self.value, datatype=self.datatype)
|
||||
|
||||
|
||||
class Average(Accumulator):
|
||||
def __init__(self, aggregation: CompValue):
|
||||
super(Average, self).__init__(aggregation)
|
||||
self.counter = 0
|
||||
self.sum = 0
|
||||
self.datatype: Optional[str] = None
|
||||
|
||||
def update(self, row: FrozenBindings, aggregator: Aggregator) -> None:
|
||||
try:
|
||||
value = _eval(self.expr, row)
|
||||
dt = self.datatype
|
||||
self.sum = sum(type_safe_numbers(self.sum, numeric(value)))
|
||||
if dt is None:
|
||||
dt = value.datatype
|
||||
else:
|
||||
# type error: Argument 1 to "type_promotion" has incompatible type "str"; expected "URIRef"
|
||||
dt = type_promotion(dt, value.datatype) # type: ignore[arg-type]
|
||||
self.datatype = dt
|
||||
if self.distinct:
|
||||
self.seen.add(value)
|
||||
self.counter += 1
|
||||
# skip UNDEF or BNode => SPARQLTypeError
|
||||
except NotBoundError:
|
||||
pass
|
||||
except SPARQLTypeError:
|
||||
pass
|
||||
|
||||
def get_value(self) -> Literal:
|
||||
if self.counter == 0:
|
||||
return Literal(0)
|
||||
if self.datatype in (XSD.float, XSD.double):
|
||||
return Literal(self.sum / self.counter)
|
||||
else:
|
||||
return Literal(Decimal(self.sum) / Decimal(self.counter))
|
||||
|
||||
|
||||
class Extremum(Accumulator):
|
||||
"""abstract base class for Minimum and Maximum"""
|
||||
|
||||
def __init__(self, aggregation: CompValue):
|
||||
self.compare: Callable[[Any, Any], Any]
|
||||
super(Extremum, self).__init__(aggregation)
|
||||
self.value: Any = None
|
||||
# DISTINCT would not change the value for MIN or MAX
|
||||
# type error: Cannot assign to a method
|
||||
self.use_row = self.dont_care # type: ignore[method-assign]
|
||||
|
||||
def set_value(self, bindings: MutableMapping[Variable, Identifier]) -> None:
|
||||
if self.value is not None:
|
||||
# simply do not set if self.value is still None
|
||||
bindings[self.var] = Literal(self.value)
|
||||
|
||||
def update(self, row: FrozenBindings, aggregator: Aggregator) -> None:
|
||||
try:
|
||||
if self.value is None:
|
||||
self.value = _eval(self.expr, row)
|
||||
else:
|
||||
# self.compare is implemented by Minimum/Maximum
|
||||
self.value = self.compare(self.value, _eval(self.expr, row))
|
||||
# skip UNDEF or BNode => SPARQLTypeError
|
||||
except NotBoundError:
|
||||
pass
|
||||
except SPARQLTypeError:
|
||||
pass
|
||||
|
||||
|
||||
_ValueT = TypeVar("_ValueT", Variable, BNode, URIRef, Literal)
|
||||
|
||||
|
||||
class Minimum(Extremum):
|
||||
def compare(self, val1: _ValueT, val2: _ValueT) -> _ValueT:
|
||||
return min(val1, val2, key=_val)
|
||||
|
||||
|
||||
class Maximum(Extremum):
|
||||
def compare(self, val1: _ValueT, val2: _ValueT) -> _ValueT:
|
||||
return max(val1, val2, key=_val)
|
||||
|
||||
|
||||
class Sample(Accumulator):
|
||||
"""takes the first eligible value"""
|
||||
|
||||
def __init__(self, aggregation):
|
||||
super(Sample, self).__init__(aggregation)
|
||||
# DISTINCT would not change the value
|
||||
# type error: Cannot assign to a method
|
||||
self.use_row = self.dont_care # type: ignore[method-assign]
|
||||
|
||||
def update(self, row: FrozenBindings, aggregator: Aggregator) -> None:
|
||||
try:
|
||||
# set the value now
|
||||
aggregator.bindings[self.var] = _eval(self.expr, row)
|
||||
# and skip this accumulator for future rows
|
||||
del aggregator.accumulators[self.var]
|
||||
except NotBoundError:
|
||||
pass
|
||||
|
||||
def get_value(self) -> None:
|
||||
# set None if no value was set
|
||||
return None
|
||||
|
||||
|
||||
class GroupConcat(Accumulator):
|
||||
value: List[Literal]
|
||||
|
||||
def __init__(self, aggregation: CompValue):
|
||||
super(GroupConcat, self).__init__(aggregation)
|
||||
# only GROUPCONCAT needs to have a list as accumulator
|
||||
self.value = []
|
||||
if aggregation.separator is None:
|
||||
self.separator = " "
|
||||
else:
|
||||
self.separator = aggregation.separator
|
||||
|
||||
def update(self, row: FrozenBindings, aggregator: Aggregator) -> None:
|
||||
try:
|
||||
value = _eval(self.expr, row)
|
||||
# skip UNDEF
|
||||
if isinstance(value, NotBoundError):
|
||||
return
|
||||
self.value.append(value)
|
||||
if self.distinct:
|
||||
self.seen.add(value)
|
||||
# skip UNDEF
|
||||
# NOTE: It seems like this is not the way undefined values occur, they
|
||||
# come through not as exceptions but as values. This is left here
|
||||
# however as it may occur in some cases.
|
||||
# TODO: Consider removing this.
|
||||
except NotBoundError:
|
||||
pass
|
||||
|
||||
def get_value(self) -> Literal:
|
||||
return Literal(self.separator.join(str(v) for v in self.value))
|
||||
|
||||
|
||||
class Aggregator:
|
||||
"""combines different Accumulator objects"""
|
||||
|
||||
accumulator_classes = {
|
||||
"Aggregate_Count": Counter,
|
||||
"Aggregate_Sample": Sample,
|
||||
"Aggregate_Sum": Sum,
|
||||
"Aggregate_Avg": Average,
|
||||
"Aggregate_Min": Minimum,
|
||||
"Aggregate_Max": Maximum,
|
||||
"Aggregate_GroupConcat": GroupConcat,
|
||||
}
|
||||
|
||||
def __init__(self, aggregations: List[CompValue]):
|
||||
self.bindings: Dict[Variable, Identifier] = {}
|
||||
self.accumulators: Dict[str, Accumulator] = {}
|
||||
for a in aggregations:
|
||||
accumulator_class = self.accumulator_classes.get(a.name)
|
||||
if accumulator_class is None:
|
||||
raise Exception("Unknown aggregate function " + a.name)
|
||||
self.accumulators[a.res] = accumulator_class(a)
|
||||
|
||||
def update(self, row: FrozenBindings) -> None:
|
||||
"""update all own accumulators"""
|
||||
# SAMPLE accumulators may delete themselves
|
||||
# => iterate over list not generator
|
||||
|
||||
for acc in list(self.accumulators.values()):
|
||||
if acc.use_row(row):
|
||||
acc.update(row, self)
|
||||
|
||||
def get_bindings(self) -> Mapping[Variable, Identifier]:
|
||||
"""calculate and set last values"""
|
||||
for acc in self.accumulators.values():
|
||||
acc.set_value(self.bindings)
|
||||
return self.bindings
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,102 @@
|
||||
"""
|
||||
Utility functions for supporting the XML Schema Datatypes hierarchy
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Dict, List, Optional, Set
|
||||
|
||||
from rdflib.namespace import XSD
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from rdflib.term import URIRef
|
||||
|
||||
|
||||
XSD_DTs: Set[URIRef] = set(
|
||||
(
|
||||
XSD.integer,
|
||||
XSD.decimal,
|
||||
XSD.float,
|
||||
XSD.double,
|
||||
XSD.string,
|
||||
XSD.boolean,
|
||||
XSD.dateTime,
|
||||
XSD.nonPositiveInteger,
|
||||
XSD.negativeInteger,
|
||||
XSD.long,
|
||||
XSD.int,
|
||||
XSD.short,
|
||||
XSD.byte,
|
||||
XSD.nonNegativeInteger,
|
||||
XSD.unsignedLong,
|
||||
XSD.unsignedInt,
|
||||
XSD.unsignedShort,
|
||||
XSD.unsignedByte,
|
||||
XSD.positiveInteger,
|
||||
XSD.date,
|
||||
)
|
||||
)
|
||||
|
||||
# adding dateTime datatypes
|
||||
|
||||
XSD_DateTime_DTs = set((XSD.dateTime, XSD.date, XSD.time))
|
||||
|
||||
XSD_Duration_DTs = set((XSD.duration, XSD.dayTimeDuration, XSD.yearMonthDuration))
|
||||
|
||||
_sub_types: Dict[URIRef, List[URIRef]] = {
|
||||
XSD.integer: [
|
||||
XSD.nonPositiveInteger,
|
||||
XSD.negativeInteger,
|
||||
XSD.long,
|
||||
XSD.int,
|
||||
XSD.short,
|
||||
XSD.byte,
|
||||
XSD.nonNegativeInteger,
|
||||
XSD.positiveInteger,
|
||||
XSD.unsignedLong,
|
||||
XSD.unsignedInt,
|
||||
XSD.unsignedShort,
|
||||
XSD.unsignedByte,
|
||||
],
|
||||
}
|
||||
|
||||
_super_types: Dict[URIRef, URIRef] = {}
|
||||
for superdt in XSD_DTs:
|
||||
for subdt in _sub_types.get(superdt, []):
|
||||
_super_types[subdt] = superdt
|
||||
|
||||
# we only care about float, double, integer, decimal
|
||||
_typePromotionMap: Dict[URIRef, Dict[URIRef, URIRef]] = {
|
||||
XSD.float: {XSD.integer: XSD.float, XSD.decimal: XSD.float, XSD.double: XSD.double},
|
||||
XSD.double: {
|
||||
XSD.integer: XSD.double,
|
||||
XSD.float: XSD.double,
|
||||
XSD.decimal: XSD.double,
|
||||
},
|
||||
XSD.decimal: {
|
||||
XSD.integer: XSD.decimal,
|
||||
XSD.float: XSD.float,
|
||||
XSD.double: XSD.double,
|
||||
},
|
||||
XSD.integer: {
|
||||
XSD.decimal: XSD.decimal,
|
||||
XSD.float: XSD.float,
|
||||
XSD.double: XSD.double,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def type_promotion(t1: URIRef, t2: Optional[URIRef]) -> URIRef:
|
||||
if t2 is None:
|
||||
return t1
|
||||
t1 = _super_types.get(t1, t1)
|
||||
t2 = _super_types.get(t2, t2)
|
||||
if t1 == t2:
|
||||
return t1 # matching super-types
|
||||
try:
|
||||
if TYPE_CHECKING:
|
||||
# type assert because mypy is confused and thinks t2 can be None
|
||||
assert t2 is not None
|
||||
return _typePromotionMap[t1][t2]
|
||||
except KeyError:
|
||||
raise TypeError("Operators cannot combine datatypes %s and %s" % (t1, t2))
|
||||
@@ -0,0 +1,685 @@
|
||||
"""
|
||||
These method recursively evaluate the SPARQL Algebra
|
||||
|
||||
evalQuery is the entry-point, it will setup context and
|
||||
return the SPARQLResult object
|
||||
|
||||
evalPart is called on each level and will delegate to the right method
|
||||
|
||||
A rdflib.plugins.sparql.sparql.QueryContext is passed along, keeping
|
||||
information needed for evaluation
|
||||
|
||||
A list of dicts (solution mappings) is returned, apart from GroupBy which may
|
||||
also return a dict of list of dicts
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import collections
|
||||
import itertools
|
||||
import re
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Deque,
|
||||
Dict,
|
||||
Generator,
|
||||
Iterable,
|
||||
List,
|
||||
Mapping,
|
||||
Optional,
|
||||
Tuple,
|
||||
Union,
|
||||
)
|
||||
from urllib.parse import urlencode
|
||||
from urllib.request import Request, urlopen
|
||||
|
||||
from pyparsing import ParseException
|
||||
|
||||
from rdflib.graph import Graph
|
||||
from rdflib.plugins.sparql import CUSTOM_EVALS, parser
|
||||
from rdflib.plugins.sparql.aggregates import Aggregator
|
||||
from rdflib.plugins.sparql.evalutils import (
|
||||
_ebv,
|
||||
_eval,
|
||||
_fillTemplate,
|
||||
_join,
|
||||
_minus,
|
||||
_val,
|
||||
)
|
||||
from rdflib.plugins.sparql.parserutils import CompValue, value
|
||||
from rdflib.plugins.sparql.sparql import (
|
||||
AlreadyBound,
|
||||
FrozenBindings,
|
||||
FrozenDict,
|
||||
Query,
|
||||
QueryContext,
|
||||
SPARQLError,
|
||||
)
|
||||
from rdflib.term import BNode, Identifier, Literal, URIRef, Variable
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from rdflib.paths import Path
|
||||
|
||||
import json
|
||||
|
||||
try:
|
||||
import orjson
|
||||
|
||||
_HAS_ORJSON = True
|
||||
except ImportError:
|
||||
orjson = None # type: ignore[assignment, unused-ignore]
|
||||
_HAS_ORJSON = False
|
||||
|
||||
_Triple = Tuple[Identifier, Identifier, Identifier]
|
||||
|
||||
|
||||
def evalBGP(
|
||||
ctx: QueryContext, bgp: List[_Triple]
|
||||
) -> Generator[FrozenBindings, None, None]:
|
||||
"""
|
||||
A basic graph pattern
|
||||
"""
|
||||
|
||||
if not bgp:
|
||||
yield ctx.solution()
|
||||
return
|
||||
|
||||
s, p, o = bgp[0]
|
||||
|
||||
_s = ctx[s]
|
||||
_p = ctx[p]
|
||||
_o = ctx[o]
|
||||
|
||||
# type error: Item "None" of "Optional[Graph]" has no attribute "triples"
|
||||
# type Argument 1 to "triples" of "Graph" has incompatible type "Tuple[Union[str, Path, None], Union[str, Path, None], Union[str, Path, None]]"; expected "Tuple[Optional[Node], Optional[Node], Optional[Node]]"
|
||||
for ss, sp, so in ctx.graph.triples((_s, _p, _o)): # type: ignore[union-attr, arg-type]
|
||||
if None in (_s, _p, _o):
|
||||
c = ctx.push()
|
||||
else:
|
||||
c = ctx
|
||||
|
||||
if _s is None:
|
||||
# type error: Incompatible types in assignment (expression has type "Union[Node, Any]", target has type "Identifier")
|
||||
c[s] = ss # type: ignore[assignment]
|
||||
|
||||
try:
|
||||
if _p is None:
|
||||
# type error: Incompatible types in assignment (expression has type "Union[Node, Any]", target has type "Identifier")
|
||||
c[p] = sp # type: ignore[assignment]
|
||||
except AlreadyBound:
|
||||
continue
|
||||
|
||||
try:
|
||||
if _o is None:
|
||||
# type error: Incompatible types in assignment (expression has type "Union[Node, Any]", target has type "Identifier")
|
||||
c[o] = so # type: ignore[assignment]
|
||||
except AlreadyBound:
|
||||
continue
|
||||
|
||||
for x in evalBGP(c, bgp[1:]):
|
||||
yield x
|
||||
|
||||
|
||||
def evalExtend(
|
||||
ctx: QueryContext, extend: CompValue
|
||||
) -> Generator[FrozenBindings, None, None]:
|
||||
# TODO: Deal with dict returned from evalPart from GROUP BY
|
||||
|
||||
for c in evalPart(ctx, extend.p):
|
||||
try:
|
||||
e = _eval(extend.expr, c.forget(ctx, _except=extend._vars))
|
||||
if isinstance(e, SPARQLError):
|
||||
raise e
|
||||
|
||||
yield c.merge({extend.var: e})
|
||||
|
||||
except SPARQLError:
|
||||
yield c
|
||||
|
||||
|
||||
def evalLazyJoin(
|
||||
ctx: QueryContext, join: CompValue
|
||||
) -> Generator[FrozenBindings, None, None]:
|
||||
"""
|
||||
A lazy join will push the variables bound
|
||||
in the first part to the second part,
|
||||
essentially doing the join implicitly
|
||||
hopefully evaluating much fewer triples
|
||||
"""
|
||||
for a in evalPart(ctx, join.p1):
|
||||
c = ctx.thaw(a)
|
||||
for b in evalPart(c, join.p2):
|
||||
yield b.merge(a) # merge, as some bindings may have been forgotten
|
||||
|
||||
|
||||
def evalJoin(ctx: QueryContext, join: CompValue) -> Generator[FrozenDict, None, None]:
|
||||
# TODO: Deal with dict returned from evalPart from GROUP BY
|
||||
# only ever for join.p1
|
||||
|
||||
if join.lazy:
|
||||
return evalLazyJoin(ctx, join)
|
||||
else:
|
||||
a = evalPart(ctx, join.p1)
|
||||
b = set(evalPart(ctx, join.p2))
|
||||
return _join(a, b)
|
||||
|
||||
|
||||
def evalUnion(ctx: QueryContext, union: CompValue) -> List[Any]:
|
||||
branch1_branch2 = []
|
||||
for x in evalPart(ctx, union.p1):
|
||||
branch1_branch2.append(x)
|
||||
for x in evalPart(ctx, union.p2):
|
||||
branch1_branch2.append(x)
|
||||
return branch1_branch2
|
||||
|
||||
|
||||
def evalMinus(ctx: QueryContext, minus: CompValue) -> Generator[FrozenDict, None, None]:
|
||||
a = evalPart(ctx, minus.p1)
|
||||
b = set(evalPart(ctx, minus.p2))
|
||||
return _minus(a, b)
|
||||
|
||||
|
||||
def evalLeftJoin(
|
||||
ctx: QueryContext, join: CompValue
|
||||
) -> Generator[FrozenBindings, None, None]:
|
||||
# import pdb; pdb.set_trace()
|
||||
for a in evalPart(ctx, join.p1):
|
||||
ok = False
|
||||
c = ctx.thaw(a)
|
||||
for b in evalPart(c, join.p2):
|
||||
if _ebv(join.expr, b.forget(ctx)):
|
||||
ok = True
|
||||
yield b
|
||||
if not ok:
|
||||
# we've cheated, the ctx above may contain
|
||||
# vars bound outside our scope
|
||||
# before we yield a solution without the OPTIONAL part
|
||||
# check that we would have had no OPTIONAL matches
|
||||
# even without prior bindings...
|
||||
p1_vars = join.p1._vars
|
||||
if p1_vars is None or not any(
|
||||
_ebv(join.expr, b)
|
||||
for b in evalPart(ctx.thaw(a.remember(p1_vars)), join.p2)
|
||||
):
|
||||
yield a
|
||||
|
||||
|
||||
def evalFilter(
|
||||
ctx: QueryContext, part: CompValue
|
||||
) -> Generator[FrozenBindings, None, None]:
|
||||
# TODO: Deal with dict returned from evalPart!
|
||||
for c in evalPart(ctx, part.p):
|
||||
if _ebv(
|
||||
part.expr,
|
||||
c.forget(ctx, _except=part._vars) if not part.no_isolated_scope else c,
|
||||
):
|
||||
yield c
|
||||
|
||||
|
||||
def evalGraph(
|
||||
ctx: QueryContext, part: CompValue
|
||||
) -> Generator[FrozenBindings, None, None]:
|
||||
if ctx.dataset is None:
|
||||
raise Exception(
|
||||
"Non-conjunctive-graph doesn't know about "
|
||||
+ "graphs. Try a query without GRAPH."
|
||||
)
|
||||
|
||||
ctx = ctx.clone()
|
||||
graph: Union[str, Path, None, Graph] = ctx[part.term]
|
||||
prev_graph = ctx.graph
|
||||
if graph is None:
|
||||
for graph in ctx.dataset.contexts():
|
||||
# in SPARQL the default graph is NOT a named graph
|
||||
if graph == ctx.dataset.default_context:
|
||||
continue
|
||||
|
||||
c = ctx.pushGraph(graph)
|
||||
c = c.push()
|
||||
graphSolution = [{part.term: graph.identifier}]
|
||||
for x in _join(evalPart(c, part.p), graphSolution):
|
||||
x.ctx.graph = prev_graph
|
||||
yield x
|
||||
|
||||
else:
|
||||
if TYPE_CHECKING:
|
||||
assert not isinstance(graph, Graph)
|
||||
# type error: Argument 1 to "get_context" of "ConjunctiveGraph" has incompatible type "Union[str, Path]"; expected "Union[Node, str, None]"
|
||||
c = ctx.pushGraph(ctx.dataset.get_context(graph)) # type: ignore[arg-type]
|
||||
for x in evalPart(c, part.p):
|
||||
x.ctx.graph = prev_graph
|
||||
yield x
|
||||
|
||||
|
||||
def evalValues(
|
||||
ctx: QueryContext, part: CompValue
|
||||
) -> Generator[FrozenBindings, None, None]:
|
||||
for r in part.p.res:
|
||||
c = ctx.push()
|
||||
try:
|
||||
for k, v in r.items():
|
||||
if v != "UNDEF":
|
||||
c[k] = v
|
||||
except AlreadyBound:
|
||||
continue
|
||||
|
||||
yield c.solution()
|
||||
|
||||
|
||||
def evalMultiset(ctx: QueryContext, part: CompValue):
|
||||
if part.p.name == "values":
|
||||
return evalValues(ctx, part)
|
||||
|
||||
return evalPart(ctx, part.p)
|
||||
|
||||
|
||||
def evalPart(ctx: QueryContext, part: CompValue) -> Any:
|
||||
# try custom evaluation functions
|
||||
for name, c in CUSTOM_EVALS.items():
|
||||
try:
|
||||
return c(ctx, part)
|
||||
except NotImplementedError:
|
||||
pass # the given custome-function did not handle this part
|
||||
|
||||
if part.name == "BGP":
|
||||
# Reorder triples patterns by number of bound nodes in the current ctx
|
||||
# Do patterns with more bound nodes first
|
||||
triples = sorted(
|
||||
part.triples, key=lambda t: len([n for n in t if ctx[n] is None])
|
||||
)
|
||||
|
||||
return evalBGP(ctx, triples)
|
||||
elif part.name == "Filter":
|
||||
return evalFilter(ctx, part)
|
||||
elif part.name == "Join":
|
||||
return evalJoin(ctx, part)
|
||||
elif part.name == "LeftJoin":
|
||||
return evalLeftJoin(ctx, part)
|
||||
elif part.name == "Graph":
|
||||
return evalGraph(ctx, part)
|
||||
elif part.name == "Union":
|
||||
return evalUnion(ctx, part)
|
||||
elif part.name == "ToMultiSet":
|
||||
return evalMultiset(ctx, part)
|
||||
elif part.name == "Extend":
|
||||
return evalExtend(ctx, part)
|
||||
elif part.name == "Minus":
|
||||
return evalMinus(ctx, part)
|
||||
|
||||
elif part.name == "Project":
|
||||
return evalProject(ctx, part)
|
||||
elif part.name == "Slice":
|
||||
return evalSlice(ctx, part)
|
||||
elif part.name == "Distinct":
|
||||
return evalDistinct(ctx, part)
|
||||
elif part.name == "Reduced":
|
||||
return evalReduced(ctx, part)
|
||||
|
||||
elif part.name == "OrderBy":
|
||||
return evalOrderBy(ctx, part)
|
||||
elif part.name == "Group":
|
||||
return evalGroup(ctx, part)
|
||||
elif part.name == "AggregateJoin":
|
||||
return evalAggregateJoin(ctx, part)
|
||||
|
||||
elif part.name == "SelectQuery":
|
||||
return evalSelectQuery(ctx, part)
|
||||
elif part.name == "AskQuery":
|
||||
return evalAskQuery(ctx, part)
|
||||
elif part.name == "ConstructQuery":
|
||||
return evalConstructQuery(ctx, part)
|
||||
|
||||
elif part.name == "ServiceGraphPattern":
|
||||
return evalServiceQuery(ctx, part)
|
||||
|
||||
elif part.name == "DescribeQuery":
|
||||
return evalDescribeQuery(ctx, part)
|
||||
|
||||
else:
|
||||
raise Exception("I dont know: %s" % part.name)
|
||||
|
||||
|
||||
def evalServiceQuery(ctx: QueryContext, part: CompValue):
|
||||
res = {}
|
||||
match = re.match(
|
||||
"^service <(.*)>[ \n]*{(.*)}[ \n]*$",
|
||||
# type error: Argument 2 to "get" of "CompValue" has incompatible type "str"; expected "bool" [arg-type]
|
||||
part.get("service_string", ""), # type: ignore[arg-type]
|
||||
re.DOTALL | re.I,
|
||||
)
|
||||
|
||||
if match:
|
||||
service_url = match.group(1)
|
||||
service_query = _buildQueryStringForServiceCall(ctx, match.group(2))
|
||||
|
||||
query_settings = {"query": service_query, "output": "json"}
|
||||
headers = {
|
||||
"accept": "application/sparql-results+json",
|
||||
"user-agent": "rdflibForAnUser",
|
||||
}
|
||||
# GET is easier to cache so prefer that if the query is not to long
|
||||
if len(service_query) < 600:
|
||||
response = urlopen(
|
||||
Request(service_url + "?" + urlencode(query_settings), headers=headers)
|
||||
)
|
||||
else:
|
||||
response = urlopen(
|
||||
Request(
|
||||
service_url,
|
||||
data=urlencode(query_settings).encode(),
|
||||
headers=headers,
|
||||
)
|
||||
)
|
||||
if response.status == 200:
|
||||
if _HAS_ORJSON:
|
||||
json_dict = orjson.loads(response.read())
|
||||
else:
|
||||
json_dict = json.loads(response.read())
|
||||
variables = res["vars_"] = json_dict["head"]["vars"]
|
||||
# or just return the bindings?
|
||||
res = json_dict["results"]["bindings"]
|
||||
if len(res) > 0:
|
||||
for r in res:
|
||||
# type error: Argument 2 to "_yieldBindingsFromServiceCallResult" has incompatible type "str"; expected "Dict[str, Dict[str, str]]"
|
||||
for bound in _yieldBindingsFromServiceCallResult(ctx, r, variables): # type: ignore[arg-type]
|
||||
yield bound
|
||||
else:
|
||||
raise Exception(
|
||||
"Service: %s responded with code: %s", service_url, response.status
|
||||
)
|
||||
|
||||
|
||||
"""
|
||||
Build a query string to be used by the service call.
|
||||
It is supposed to pass in the existing bound solutions.
|
||||
Re-adds prefixes if added and sets the base.
|
||||
Wraps it in select if needed.
|
||||
"""
|
||||
|
||||
|
||||
def _buildQueryStringForServiceCall(ctx: QueryContext, service_query: str) -> str:
|
||||
try:
|
||||
parser.parseQuery(service_query)
|
||||
except ParseException:
|
||||
# This could be because we don't have a select around the service call.
|
||||
service_query = "SELECT REDUCED * WHERE {" + service_query + "}"
|
||||
# type error: Item "None" of "Optional[Prologue]" has no attribute "namespace_manager"
|
||||
for p in ctx.prologue.namespace_manager.store.namespaces(): # type: ignore[union-attr]
|
||||
service_query = "PREFIX " + p[0] + ":" + p[1].n3() + " " + service_query
|
||||
# re add the base if one was defined
|
||||
# type error: Item "None" of "Optional[Prologue]" has no attribute "base"
|
||||
base = ctx.prologue.base # type: ignore[union-attr]
|
||||
if base is not None and len(base) > 0:
|
||||
service_query = "BASE <" + base + "> " + service_query
|
||||
sol = [v for v in ctx.solution() if isinstance(v, Variable)]
|
||||
if len(sol) > 0:
|
||||
variables = " ".join([v.n3() for v in sol])
|
||||
variables_bound = " ".join([ctx.get(v).n3() for v in sol])
|
||||
service_query = (
|
||||
service_query + "VALUES (" + variables + ") {(" + variables_bound + ")}"
|
||||
)
|
||||
return service_query
|
||||
|
||||
|
||||
def _yieldBindingsFromServiceCallResult(
|
||||
ctx: QueryContext, r: Dict[str, Dict[str, str]], variables: List[str]
|
||||
) -> Generator[FrozenBindings, None, None]:
|
||||
res_dict: Dict[Variable, Identifier] = {}
|
||||
for var in variables:
|
||||
if var in r and r[var]:
|
||||
var_binding = r[var]
|
||||
var_type = var_binding["type"]
|
||||
if var_type == "uri":
|
||||
res_dict[Variable(var)] = URIRef(var_binding["value"])
|
||||
elif var_type == "literal":
|
||||
res_dict[Variable(var)] = Literal(
|
||||
var_binding["value"],
|
||||
datatype=var_binding.get("datatype"),
|
||||
lang=var_binding.get("xml:lang"),
|
||||
)
|
||||
# This is here because of
|
||||
# https://www.w3.org/TR/2006/NOTE-rdf-sparql-json-res-20061004/#variable-binding-results
|
||||
elif var_type == "typed-literal":
|
||||
res_dict[Variable(var)] = Literal(
|
||||
var_binding["value"], datatype=URIRef(var_binding["datatype"])
|
||||
)
|
||||
elif var_type == "bnode":
|
||||
res_dict[Variable(var)] = BNode(var_binding["value"])
|
||||
else:
|
||||
raise ValueError(f"invalid type {var_type!r} for variable {var!r}")
|
||||
yield FrozenBindings(ctx, res_dict)
|
||||
|
||||
|
||||
def evalGroup(ctx: QueryContext, group: CompValue):
|
||||
"""
|
||||
http://www.w3.org/TR/sparql11-query/#defn_algGroup
|
||||
"""
|
||||
# grouping should be implemented by evalAggregateJoin
|
||||
return evalPart(ctx, group.p)
|
||||
|
||||
|
||||
def evalAggregateJoin(
|
||||
ctx: QueryContext, agg: CompValue
|
||||
) -> Generator[FrozenBindings, None, None]:
|
||||
# import pdb ; pdb.set_trace()
|
||||
p = evalPart(ctx, agg.p)
|
||||
# p is always a Group, we always get a dict back
|
||||
|
||||
group_expr = agg.p.expr
|
||||
res: Dict[Any, Any] = collections.defaultdict(
|
||||
lambda: Aggregator(aggregations=agg.A)
|
||||
)
|
||||
|
||||
if group_expr is None:
|
||||
# no grouping, just COUNT in SELECT clause
|
||||
# get 1 aggregator for counting
|
||||
aggregator = res[True]
|
||||
for row in p:
|
||||
aggregator.update(row)
|
||||
else:
|
||||
for row in p:
|
||||
# determine right group aggregator for row
|
||||
k = tuple(_eval(e, row, False) for e in group_expr)
|
||||
res[k].update(row)
|
||||
|
||||
# all rows are done; yield aggregated values
|
||||
for aggregator in res.values():
|
||||
yield FrozenBindings(ctx, aggregator.get_bindings())
|
||||
|
||||
# there were no matches
|
||||
if len(res) == 0:
|
||||
yield FrozenBindings(ctx)
|
||||
|
||||
|
||||
def evalOrderBy(
|
||||
ctx: QueryContext, part: CompValue
|
||||
) -> Generator[FrozenBindings, None, None]:
|
||||
res = evalPart(ctx, part.p)
|
||||
|
||||
for e in reversed(part.expr):
|
||||
reverse = bool(e.order and e.order == "DESC")
|
||||
res = sorted(
|
||||
res, key=lambda x: _val(value(x, e.expr, variables=True)), reverse=reverse
|
||||
)
|
||||
|
||||
return res
|
||||
|
||||
|
||||
def evalSlice(ctx: QueryContext, slice: CompValue):
|
||||
res = evalPart(ctx, slice.p)
|
||||
|
||||
return itertools.islice(
|
||||
res,
|
||||
slice.start,
|
||||
slice.start + slice.length if slice.length is not None else None,
|
||||
)
|
||||
|
||||
|
||||
def evalReduced(
|
||||
ctx: QueryContext, part: CompValue
|
||||
) -> Generator[FrozenBindings, None, None]:
|
||||
"""apply REDUCED to result
|
||||
|
||||
REDUCED is not as strict as DISTINCT, but if the incoming rows were sorted
|
||||
it should produce the same result with limited extra memory and time per
|
||||
incoming row.
|
||||
"""
|
||||
|
||||
# This implementation uses a most recently used strategy and a limited
|
||||
# buffer size. It relates to a LRU caching algorithm:
|
||||
# https://en.wikipedia.org/wiki/Cache_algorithms#Least_Recently_Used_.28LRU.29
|
||||
MAX = 1
|
||||
# TODO: add configuration or determine "best" size for most use cases
|
||||
# 0: No reduction
|
||||
# 1: compare only with the last row, almost no reduction with
|
||||
# unordered incoming rows
|
||||
# N: The greater the buffer size the greater the reduction but more
|
||||
# memory and time are needed
|
||||
|
||||
# mixed data structure: set for lookup, deque for append/pop/remove
|
||||
mru_set = set()
|
||||
mru_queue: Deque[Any] = collections.deque()
|
||||
|
||||
for row in evalPart(ctx, part.p):
|
||||
if row in mru_set:
|
||||
# forget last position of row
|
||||
mru_queue.remove(row)
|
||||
else:
|
||||
# row seems to be new
|
||||
yield row
|
||||
mru_set.add(row)
|
||||
if len(mru_set) > MAX:
|
||||
# drop the least recently used row from buffer
|
||||
mru_set.remove(mru_queue.pop())
|
||||
# put row to the front
|
||||
mru_queue.appendleft(row)
|
||||
|
||||
|
||||
def evalDistinct(
|
||||
ctx: QueryContext, part: CompValue
|
||||
) -> Generator[FrozenBindings, None, None]:
|
||||
res = evalPart(ctx, part.p)
|
||||
|
||||
done = set()
|
||||
for x in res:
|
||||
if x not in done:
|
||||
yield x
|
||||
done.add(x)
|
||||
|
||||
|
||||
def evalProject(ctx: QueryContext, project: CompValue):
|
||||
res = evalPart(ctx, project.p)
|
||||
return (row.project(project.PV) for row in res)
|
||||
|
||||
|
||||
def evalSelectQuery(
|
||||
ctx: QueryContext, query: CompValue
|
||||
) -> Mapping[str, Union[str, List[Variable], Iterable[FrozenDict]]]:
|
||||
res: Dict[str, Union[str, List[Variable], Iterable[FrozenDict]]] = {}
|
||||
res["type_"] = "SELECT"
|
||||
res["bindings"] = evalPart(ctx, query.p)
|
||||
res["vars_"] = query.PV
|
||||
return res
|
||||
|
||||
|
||||
def evalAskQuery(ctx: QueryContext, query: CompValue) -> Mapping[str, Union[str, bool]]:
|
||||
res: Dict[str, Union[bool, str]] = {}
|
||||
res["type_"] = "ASK"
|
||||
res["askAnswer"] = False
|
||||
for x in evalPart(ctx, query.p):
|
||||
res["askAnswer"] = True
|
||||
break
|
||||
|
||||
return res
|
||||
|
||||
|
||||
def evalConstructQuery(
|
||||
ctx: QueryContext, query: CompValue
|
||||
) -> Mapping[str, Union[str, Graph]]:
|
||||
template = query.template
|
||||
|
||||
if not template:
|
||||
# a construct-where query
|
||||
template = query.p.p.triples # query->project->bgp ...
|
||||
|
||||
graph = Graph()
|
||||
|
||||
for c in evalPart(ctx, query.p):
|
||||
graph += _fillTemplate(template, c)
|
||||
|
||||
res: Dict[str, Union[str, Graph]] = {}
|
||||
res["type_"] = "CONSTRUCT"
|
||||
res["graph"] = graph
|
||||
|
||||
return res
|
||||
|
||||
|
||||
def evalDescribeQuery(ctx: QueryContext, query) -> Dict[str, Union[str, Graph]]:
|
||||
# Create a result graph and bind namespaces from the graph being queried
|
||||
graph = Graph()
|
||||
# type error: Item "None" of "Optional[Graph]" has no attribute "namespaces"
|
||||
for pfx, ns in ctx.graph.namespaces(): # type: ignore[union-attr]
|
||||
graph.bind(pfx, ns)
|
||||
|
||||
to_describe = set()
|
||||
|
||||
# Explicit IRIs may be provided to a DESCRIBE query.
|
||||
# If there is a WHERE clause, explicit IRIs may be provided in
|
||||
# addition to projected variables. Find those explicit IRIs and
|
||||
# prepare to describe them.
|
||||
for iri in query.PV:
|
||||
if isinstance(iri, URIRef):
|
||||
to_describe.add(iri)
|
||||
|
||||
# If there is a WHERE clause, evaluate it then find the unique set of
|
||||
# resources to describe across all bindings and projected variables
|
||||
if query.p is not None:
|
||||
bindings = evalPart(ctx, query.p)
|
||||
to_describe.update(*(set(binding.values()) for binding in bindings))
|
||||
|
||||
# Get a CBD for all resources identified to describe
|
||||
for resource in to_describe:
|
||||
# type error: Item "None" of "Optional[Graph]" has no attribute "cbd"
|
||||
ctx.graph.cbd(resource, target_graph=graph) # type: ignore[union-attr]
|
||||
|
||||
res: Dict[str, Union[str, Graph]] = {}
|
||||
res["type_"] = "DESCRIBE"
|
||||
res["graph"] = graph
|
||||
|
||||
return res
|
||||
|
||||
|
||||
def evalQuery(
|
||||
graph: Graph,
|
||||
query: Query,
|
||||
initBindings: Optional[Mapping[str, Identifier]] = None,
|
||||
base: Optional[str] = None,
|
||||
) -> Mapping[Any, Any]:
|
||||
"""
|
||||
|
||||
.. caution::
|
||||
|
||||
This method can access indirectly requested network endpoints, for
|
||||
example, query processing will attempt to access network endpoints
|
||||
specified in ``SERVICE`` directives.
|
||||
|
||||
When processing untrusted or potentially malicious queries, measures
|
||||
should be taken to restrict network and file access.
|
||||
|
||||
For information on available security measures, see the RDFLib
|
||||
:doc:`Security Considerations </security_considerations>`
|
||||
documentation.
|
||||
"""
|
||||
main = query.algebra
|
||||
|
||||
initBindings = dict((Variable(k), v) for k, v in (initBindings or {}).items())
|
||||
|
||||
ctx = QueryContext(
|
||||
graph, initBindings=initBindings, datasetClause=main.datasetClause
|
||||
)
|
||||
|
||||
ctx.prologue = query.prologue
|
||||
|
||||
return evalPart(ctx, main)
|
||||
@@ -0,0 +1,188 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import collections
|
||||
from typing import (
|
||||
Any,
|
||||
DefaultDict,
|
||||
Generator,
|
||||
Iterable,
|
||||
Mapping,
|
||||
Set,
|
||||
Tuple,
|
||||
TypeVar,
|
||||
Union,
|
||||
overload,
|
||||
)
|
||||
|
||||
from rdflib.plugins.sparql.operators import EBV
|
||||
from rdflib.plugins.sparql.parserutils import CompValue, Expr
|
||||
from rdflib.plugins.sparql.sparql import (
|
||||
FrozenBindings,
|
||||
FrozenDict,
|
||||
NotBoundError,
|
||||
QueryContext,
|
||||
SPARQLError,
|
||||
)
|
||||
from rdflib.term import BNode, Identifier, Literal, URIRef, Variable
|
||||
|
||||
_ContextType = Union[FrozenBindings, QueryContext]
|
||||
_FrozenDictT = TypeVar("_FrozenDictT", bound=FrozenDict)
|
||||
|
||||
|
||||
def _diff(
|
||||
a: Iterable[_FrozenDictT], b: Iterable[_FrozenDictT], expr
|
||||
) -> Set[_FrozenDictT]:
|
||||
res = set()
|
||||
|
||||
for x in a:
|
||||
if all(not x.compatible(y) or not _ebv(expr, x.merge(y)) for y in b):
|
||||
res.add(x)
|
||||
|
||||
return res
|
||||
|
||||
|
||||
def _minus(
|
||||
a: Iterable[_FrozenDictT], b: Iterable[_FrozenDictT]
|
||||
) -> Generator[_FrozenDictT, None, None]:
|
||||
for x in a:
|
||||
if all((not x.compatible(y)) or x.disjointDomain(y) for y in b):
|
||||
yield x
|
||||
|
||||
|
||||
@overload
|
||||
def _join(
|
||||
a: Iterable[FrozenBindings], b: Iterable[Mapping[Identifier, Identifier]]
|
||||
) -> Generator[FrozenBindings, None, None]: ...
|
||||
|
||||
|
||||
@overload
|
||||
def _join(
|
||||
a: Iterable[FrozenDict], b: Iterable[Mapping[Identifier, Identifier]]
|
||||
) -> Generator[FrozenDict, None, None]: ...
|
||||
|
||||
|
||||
def _join(
|
||||
a: Iterable[FrozenDict], b: Iterable[Mapping[Identifier, Identifier]]
|
||||
) -> Generator[FrozenDict, None, None]:
|
||||
for x in a:
|
||||
for y in b:
|
||||
if x.compatible(y):
|
||||
yield x.merge(y)
|
||||
|
||||
|
||||
def _ebv(expr: Union[Literal, Variable, Expr], ctx: FrozenDict) -> bool:
|
||||
"""
|
||||
Return true/false for the given expr
|
||||
Either the expr is itself true/false
|
||||
or evaluates to something, with the given ctx
|
||||
|
||||
an error is false
|
||||
"""
|
||||
|
||||
try:
|
||||
return EBV(expr)
|
||||
except SPARQLError:
|
||||
pass
|
||||
if isinstance(expr, Expr):
|
||||
try:
|
||||
return EBV(expr.eval(ctx))
|
||||
except SPARQLError:
|
||||
return False # filter error == False
|
||||
# type error: Subclass of "Literal" and "CompValue" cannot exist: would have incompatible method signatures
|
||||
elif isinstance(expr, CompValue): # type: ignore[unreachable]
|
||||
raise Exception("Weird - filter got a CompValue without evalfn! %r" % expr)
|
||||
elif isinstance(expr, Variable):
|
||||
try:
|
||||
return EBV(ctx[expr])
|
||||
except: # noqa: E722
|
||||
return False
|
||||
return False
|
||||
|
||||
|
||||
@overload
|
||||
def _eval(
|
||||
expr: Union[Literal, URIRef],
|
||||
ctx: FrozenBindings,
|
||||
raise_not_bound_error: bool = ...,
|
||||
) -> Union[Literal, URIRef]: ...
|
||||
|
||||
|
||||
@overload
|
||||
def _eval(
|
||||
expr: Union[Variable, Expr],
|
||||
ctx: FrozenBindings,
|
||||
raise_not_bound_error: bool = ...,
|
||||
) -> Union[Any, SPARQLError]: ...
|
||||
|
||||
|
||||
def _eval(
|
||||
expr: Union[Literal, URIRef, Variable, Expr],
|
||||
ctx: FrozenBindings,
|
||||
raise_not_bound_error: bool = True,
|
||||
) -> Any:
|
||||
if isinstance(expr, (Literal, URIRef)):
|
||||
return expr
|
||||
if isinstance(expr, Expr):
|
||||
return expr.eval(ctx)
|
||||
elif isinstance(expr, Variable):
|
||||
try:
|
||||
return ctx[expr]
|
||||
except KeyError:
|
||||
if raise_not_bound_error:
|
||||
raise NotBoundError("Variable %s is not bound" % expr)
|
||||
else:
|
||||
return None
|
||||
elif isinstance(expr, CompValue): # type: ignore[unreachable]
|
||||
raise Exception("Weird - _eval got a CompValue without evalfn! %r" % expr)
|
||||
else:
|
||||
raise Exception("Cannot eval thing: %s (%s)" % (expr, type(expr)))
|
||||
|
||||
|
||||
def _filter(
|
||||
a: Iterable[FrozenDict], expr: Union[Literal, Variable, Expr]
|
||||
) -> Generator[FrozenDict, None, None]:
|
||||
for c in a:
|
||||
if _ebv(expr, c):
|
||||
yield c
|
||||
|
||||
|
||||
def _fillTemplate(
|
||||
template: Iterable[Tuple[Identifier, Identifier, Identifier]],
|
||||
solution: _ContextType,
|
||||
) -> Generator[Tuple[Identifier, Identifier, Identifier], None, None]:
|
||||
"""
|
||||
For construct/deleteWhere and friends
|
||||
|
||||
Fill a triple template with instantiated variables
|
||||
"""
|
||||
|
||||
bnodeMap: DefaultDict[BNode, BNode] = collections.defaultdict(BNode)
|
||||
for t in template:
|
||||
s, p, o = t
|
||||
|
||||
_s = solution.get(s)
|
||||
_p = solution.get(p)
|
||||
_o = solution.get(o)
|
||||
|
||||
# instantiate new bnodes for each solution
|
||||
_s, _p, _o = [
|
||||
bnodeMap[x] if isinstance(x, BNode) else y for x, y in zip(t, (_s, _p, _o))
|
||||
]
|
||||
|
||||
if _s is not None and _p is not None and _o is not None:
|
||||
yield (_s, _p, _o)
|
||||
|
||||
|
||||
_ValueT = TypeVar("_ValueT", Variable, BNode, URIRef, Literal)
|
||||
|
||||
|
||||
def _val(v: _ValueT) -> Tuple[int, _ValueT]:
|
||||
"""utilitity for ordering things"""
|
||||
if isinstance(v, Variable):
|
||||
return (0, v)
|
||||
elif isinstance(v, BNode):
|
||||
return (1, v)
|
||||
elif isinstance(v, URIRef):
|
||||
return (2, v)
|
||||
elif isinstance(v, Literal):
|
||||
return (3, v)
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,316 @@
|
||||
"""
|
||||
|
||||
NOTE: PyParsing setResultName/__call__ provides a very similar solution to this
|
||||
I didn't realise at the time of writing and I will remove a
|
||||
lot of this code at some point
|
||||
|
||||
Utility classes for creating an abstract-syntax tree out with pyparsing actions
|
||||
|
||||
Lets you label and group parts of parser production rules
|
||||
|
||||
For example:
|
||||
|
||||
# [5] BaseDecl ::= 'BASE' IRIREF
|
||||
BaseDecl = Comp('Base', Keyword('BASE') + Param('iri',IRIREF))
|
||||
|
||||
After parsing, this gives you back an CompValue object,
|
||||
which is a dict/object with the parameters specified.
|
||||
So you can access the parameters are attributes or as keys:
|
||||
|
||||
baseDecl.iri
|
||||
|
||||
Comp lets you set an evalFn that is bound to the eval method of
|
||||
the resulting CompValue
|
||||
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections import OrderedDict
|
||||
from types import MethodType
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Callable,
|
||||
List,
|
||||
Mapping,
|
||||
Optional,
|
||||
Tuple,
|
||||
TypeVar,
|
||||
Union,
|
||||
)
|
||||
|
||||
from pyparsing import ParserElement, ParseResults, TokenConverter, originalTextFor
|
||||
|
||||
from rdflib.term import BNode, Identifier, Variable
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from rdflib.plugins.sparql.sparql import FrozenBindings
|
||||
|
||||
|
||||
# This is an alternative
|
||||
|
||||
# Comp('Sum')( Param('x')(Number) + '+' + Param('y')(Number) )
|
||||
|
||||
|
||||
def value(
|
||||
ctx: FrozenBindings,
|
||||
val: Any,
|
||||
variables: bool = False,
|
||||
errors: bool = False,
|
||||
) -> Any:
|
||||
"""
|
||||
utility function for evaluating something...
|
||||
|
||||
Variables will be looked up in the context
|
||||
Normally, non-bound vars is an error,
|
||||
set variables=True to return unbound vars
|
||||
|
||||
Normally, an error raises the error,
|
||||
set errors=True to return error
|
||||
|
||||
"""
|
||||
|
||||
if isinstance(val, Expr):
|
||||
return val.eval(ctx) # recurse?
|
||||
elif isinstance(val, CompValue):
|
||||
raise Exception("What do I do with this CompValue? %s" % val)
|
||||
|
||||
elif isinstance(val, list):
|
||||
return [value(ctx, x, variables, errors) for x in val]
|
||||
|
||||
elif isinstance(val, (BNode, Variable)):
|
||||
r = ctx.get(val)
|
||||
if isinstance(r, SPARQLError) and not errors:
|
||||
raise r
|
||||
if r is not None:
|
||||
return r
|
||||
|
||||
# not bound
|
||||
if variables:
|
||||
return val
|
||||
else:
|
||||
raise NotBoundError
|
||||
|
||||
elif isinstance(val, ParseResults) and len(val) == 1:
|
||||
return value(ctx, val[0], variables, errors)
|
||||
else:
|
||||
return val
|
||||
|
||||
|
||||
class ParamValue:
|
||||
"""
|
||||
The result of parsing a Param
|
||||
This just keeps the name/value
|
||||
All cleverness is in the CompValue
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, name: str, tokenList: Union[List[Any], ParseResults], isList: bool
|
||||
):
|
||||
self.isList = isList
|
||||
self.name = name
|
||||
if isinstance(tokenList, (list, ParseResults)) and len(tokenList) == 1:
|
||||
tokenList = tokenList[0]
|
||||
|
||||
self.tokenList = tokenList
|
||||
|
||||
def __str__(self) -> str:
|
||||
return "Param(%s, %s)" % (self.name, self.tokenList)
|
||||
|
||||
|
||||
class Param(TokenConverter):
|
||||
"""
|
||||
A pyparsing token for labelling a part of the parse-tree
|
||||
if isList is true repeat occurrences of ParamList have
|
||||
their values merged in a list
|
||||
"""
|
||||
|
||||
def __init__(self, name: str, expr, isList: bool = False):
|
||||
self.isList = isList
|
||||
TokenConverter.__init__(self, expr)
|
||||
self.setName(name)
|
||||
self.addParseAction(self.postParse2)
|
||||
|
||||
def postParse2(self, tokenList: Union[List[Any], ParseResults]) -> ParamValue:
|
||||
return ParamValue(self.name, tokenList, self.isList)
|
||||
|
||||
|
||||
class ParamList(Param):
|
||||
"""
|
||||
A shortcut for a Param with isList=True
|
||||
"""
|
||||
|
||||
def __init__(self, name: str, expr):
|
||||
Param.__init__(self, name, expr, True)
|
||||
|
||||
|
||||
_ValT = TypeVar("_ValT")
|
||||
|
||||
|
||||
class CompValue(OrderedDict):
|
||||
"""
|
||||
The result of parsing a Comp
|
||||
Any included Params are available as Dict keys
|
||||
or as attributes
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, name: str, **values):
|
||||
OrderedDict.__init__(self)
|
||||
self.name = name
|
||||
self.update(values)
|
||||
|
||||
def clone(self) -> CompValue:
|
||||
return CompValue(self.name, **self)
|
||||
|
||||
def __str__(self) -> str:
|
||||
return self.name + "_" + OrderedDict.__str__(self)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return self.name + "_" + dict.__repr__(self)
|
||||
|
||||
def _value(
|
||||
self, val: _ValT, variables: bool = False, errors: bool = False
|
||||
) -> Union[_ValT, Any]:
|
||||
if self.ctx is not None:
|
||||
return value(self.ctx, val, variables)
|
||||
else:
|
||||
return val
|
||||
|
||||
def __getitem__(self, a):
|
||||
return self._value(OrderedDict.__getitem__(self, a))
|
||||
|
||||
# type error: Signature of "get" incompatible with supertype "dict"
|
||||
# type error: Signature of "get" incompatible with supertype "Mapping" [override]
|
||||
def get(self, a, variables: bool = False, errors: bool = False): # type: ignore[override]
|
||||
return self._value(OrderedDict.get(self, a, a), variables, errors)
|
||||
|
||||
def __getattr__(self, a: str) -> Any:
|
||||
# Hack hack: OrderedDict relies on this
|
||||
if a in ("_OrderedDict__root", "_OrderedDict__end"):
|
||||
raise AttributeError()
|
||||
try:
|
||||
return self[a]
|
||||
except KeyError:
|
||||
# raise AttributeError('no such attribute '+a)
|
||||
return None
|
||||
|
||||
if TYPE_CHECKING:
|
||||
# this is here because properties are dynamically set on CompValue
|
||||
def __setattr__(self, __name: str, __value: Any) -> None: ...
|
||||
|
||||
|
||||
class Expr(CompValue):
|
||||
"""
|
||||
A CompValue that is evaluatable
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
name: str,
|
||||
evalfn: Optional[Callable[[Any, Any], Any]] = None,
|
||||
**values,
|
||||
):
|
||||
super(Expr, self).__init__(name, **values)
|
||||
|
||||
self._evalfn = None
|
||||
if evalfn:
|
||||
self._evalfn = MethodType(evalfn, self)
|
||||
|
||||
def eval(self, ctx: Any = {}) -> Union[SPARQLError, Any]:
|
||||
try:
|
||||
self.ctx: Optional[Union[Mapping, FrozenBindings]] = ctx
|
||||
# type error: "None" not callable
|
||||
return self._evalfn(ctx) # type: ignore[misc]
|
||||
except SPARQLError as e:
|
||||
return e
|
||||
finally:
|
||||
self.ctx = None
|
||||
|
||||
|
||||
class Comp(TokenConverter):
|
||||
"""
|
||||
A pyparsing token for grouping together things with a label
|
||||
Any sub-tokens that are not Params will be ignored.
|
||||
|
||||
Returns CompValue / Expr objects - depending on whether evalFn is set.
|
||||
"""
|
||||
|
||||
def __init__(self, name: str, expr: ParserElement):
|
||||
self.expr = expr
|
||||
TokenConverter.__init__(self, expr)
|
||||
self.setName(name)
|
||||
self.evalfn: Optional[Callable[[Any, Any], Any]] = None
|
||||
|
||||
def postParse(
|
||||
self, instring: str, loc: int, tokenList: ParseResults
|
||||
) -> Union[Expr, CompValue]:
|
||||
res: Union[Expr, CompValue]
|
||||
if self.evalfn:
|
||||
res = Expr(self.name)
|
||||
res._evalfn = MethodType(self.evalfn, res)
|
||||
else:
|
||||
res = CompValue(self.name)
|
||||
if self.name == "ServiceGraphPattern":
|
||||
# Then this must be a service graph pattern and have
|
||||
# already matched.
|
||||
# lets assume there is one, for now, then test for two later.
|
||||
sgp = originalTextFor(self.expr)
|
||||
service_string = sgp.searchString(instring)[0][0]
|
||||
res["service_string"] = service_string
|
||||
|
||||
for t in tokenList:
|
||||
if isinstance(t, ParamValue):
|
||||
if t.isList:
|
||||
if t.name not in res:
|
||||
res[t.name] = []
|
||||
res[t.name].append(t.tokenList)
|
||||
else:
|
||||
res[t.name] = t.tokenList
|
||||
# res.append(t.tokenList)
|
||||
# if isinstance(t,CompValue):
|
||||
# res.update(t)
|
||||
return res
|
||||
|
||||
def setEvalFn(self, evalfn: Callable[[Any, Any], Any]) -> Comp:
|
||||
self.evalfn = evalfn
|
||||
return self
|
||||
|
||||
|
||||
def prettify_parsetree(t: ParseResults, indent: str = "", depth: int = 0) -> str:
|
||||
out: List[str] = []
|
||||
for e in t.asList():
|
||||
out.append(_prettify_sub_parsetree(e, indent, depth + 1))
|
||||
for k, v in sorted(t.items()):
|
||||
out.append("%s%s- %s:\n" % (indent, " " * depth, k))
|
||||
out.append(_prettify_sub_parsetree(v, indent, depth + 1))
|
||||
return "".join(out)
|
||||
|
||||
|
||||
def _prettify_sub_parsetree(
|
||||
t: Union[Identifier, CompValue, set, list, dict, Tuple, bool, None],
|
||||
indent: str = "",
|
||||
depth: int = 0,
|
||||
) -> str:
|
||||
out: List[str] = []
|
||||
if isinstance(t, CompValue):
|
||||
out.append("%s%s> %s:\n" % (indent, " " * depth, t.name))
|
||||
for k, v in t.items():
|
||||
out.append("%s%s- %s:\n" % (indent, " " * (depth + 1), k))
|
||||
out.append(_prettify_sub_parsetree(v, indent, depth + 2))
|
||||
elif isinstance(t, dict):
|
||||
for k, v in t.items():
|
||||
out.append("%s%s- %s:\n" % (indent, " " * (depth + 1), k))
|
||||
out.append(_prettify_sub_parsetree(v, indent, depth + 2))
|
||||
elif isinstance(t, list):
|
||||
for e in t:
|
||||
out.append(_prettify_sub_parsetree(e, indent, depth + 1))
|
||||
else:
|
||||
out.append("%s%s- %r\n" % (indent, " " * depth, t))
|
||||
return "".join(out)
|
||||
|
||||
|
||||
# hurrah for circular imports
|
||||
from rdflib.plugins.sparql.sparql import NotBoundError, SPARQLError # noqa: E402
|
||||
@@ -0,0 +1,147 @@
|
||||
"""
|
||||
Code for tying SPARQL Engine into RDFLib
|
||||
|
||||
These should be automatically registered with RDFLib
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Mapping, Optional, Union
|
||||
|
||||
from rdflib.graph import Graph
|
||||
from rdflib.plugins.sparql.algebra import translateQuery, translateUpdate
|
||||
from rdflib.plugins.sparql.evaluate import evalQuery
|
||||
from rdflib.plugins.sparql.parser import parseQuery, parseUpdate
|
||||
from rdflib.plugins.sparql.sparql import Query, Update
|
||||
from rdflib.plugins.sparql.update import evalUpdate
|
||||
from rdflib.query import Processor, Result, UpdateProcessor
|
||||
from rdflib.term import Identifier
|
||||
|
||||
|
||||
def prepareQuery(
|
||||
queryString: str,
|
||||
initNs: Optional[Mapping[str, Any]] = None,
|
||||
base: Optional[str] = None,
|
||||
) -> Query:
|
||||
"""
|
||||
Parse and translate a SPARQL Query
|
||||
"""
|
||||
if initNs is None:
|
||||
initNs = {}
|
||||
ret = translateQuery(parseQuery(queryString), base, initNs)
|
||||
ret._original_args = (queryString, initNs, base)
|
||||
return ret
|
||||
|
||||
|
||||
def prepareUpdate(
|
||||
updateString: str,
|
||||
initNs: Optional[Mapping[str, Any]] = None,
|
||||
base: Optional[str] = None,
|
||||
) -> Update:
|
||||
"""
|
||||
Parse and translate a SPARQL Update
|
||||
"""
|
||||
if initNs is None:
|
||||
initNs = {}
|
||||
ret = translateUpdate(parseUpdate(updateString), base, initNs)
|
||||
ret._original_args = (updateString, initNs, base)
|
||||
return ret
|
||||
|
||||
|
||||
def processUpdate(
|
||||
graph: Graph,
|
||||
updateString: str,
|
||||
initBindings: Optional[Mapping[str, Identifier]] = None,
|
||||
initNs: Optional[Mapping[str, Any]] = None,
|
||||
base: Optional[str] = None,
|
||||
) -> None:
|
||||
"""
|
||||
Process a SPARQL Update Request
|
||||
returns Nothing on success or raises Exceptions on error
|
||||
"""
|
||||
evalUpdate(
|
||||
graph, translateUpdate(parseUpdate(updateString), base, initNs), initBindings
|
||||
)
|
||||
|
||||
|
||||
class SPARQLResult(Result):
|
||||
def __init__(self, res: Mapping[str, Any]):
|
||||
Result.__init__(self, res["type_"])
|
||||
self.vars = res.get("vars_")
|
||||
# type error: Incompatible types in assignment (expression has type "Optional[Any]", variable has type "MutableSequence[Mapping[Variable, Identifier]]")
|
||||
self.bindings = res.get("bindings") # type: ignore[assignment]
|
||||
self.askAnswer = res.get("askAnswer")
|
||||
self.graph = res.get("graph")
|
||||
|
||||
|
||||
class SPARQLUpdateProcessor(UpdateProcessor):
|
||||
def __init__(self, graph):
|
||||
self.graph = graph
|
||||
|
||||
def update(
|
||||
self,
|
||||
strOrQuery: Union[str, Update],
|
||||
initBindings: Optional[Mapping[str, Identifier]] = None,
|
||||
initNs: Optional[Mapping[str, Any]] = None,
|
||||
) -> None:
|
||||
"""
|
||||
.. caution::
|
||||
|
||||
This method can access indirectly requested network endpoints, for
|
||||
example, query processing will attempt to access network endpoints
|
||||
specified in ``SERVICE`` directives.
|
||||
|
||||
When processing untrusted or potentially malicious queries, measures
|
||||
should be taken to restrict network and file access.
|
||||
|
||||
For information on available security measures, see the RDFLib
|
||||
:doc:`Security Considerations </security_considerations>`
|
||||
documentation.
|
||||
"""
|
||||
|
||||
if isinstance(strOrQuery, str):
|
||||
strOrQuery = translateUpdate(parseUpdate(strOrQuery), initNs=initNs)
|
||||
|
||||
return evalUpdate(self.graph, strOrQuery, initBindings)
|
||||
|
||||
|
||||
class SPARQLProcessor(Processor):
|
||||
def __init__(self, graph):
|
||||
self.graph = graph
|
||||
|
||||
# NOTE on type error: this is because the super type constructor does not
|
||||
# accept base argument and thie position of the DEBUG argument is
|
||||
# different.
|
||||
# type error: Signature of "query" incompatible with supertype "Processor"
|
||||
def query( # type: ignore[override]
|
||||
self,
|
||||
strOrQuery: Union[str, Query],
|
||||
initBindings: Optional[Mapping[str, Identifier]] = None,
|
||||
initNs: Optional[Mapping[str, Any]] = None,
|
||||
base: Optional[str] = None,
|
||||
DEBUG: bool = False,
|
||||
) -> Mapping[str, Any]:
|
||||
"""
|
||||
Evaluate a query with the given initial bindings, and initial
|
||||
namespaces. The given base is used to resolve relative URIs in
|
||||
the query and will be overridden by any BASE given in the query.
|
||||
|
||||
.. caution::
|
||||
|
||||
This method can access indirectly requested network endpoints, for
|
||||
example, query processing will attempt to access network endpoints
|
||||
specified in ``SERVICE`` directives.
|
||||
|
||||
When processing untrusted or potentially malicious queries, measures
|
||||
should be taken to restrict network and file access.
|
||||
|
||||
For information on available security measures, see the RDFLib
|
||||
:doc:`Security Considerations </security_considerations>`
|
||||
documentation.
|
||||
"""
|
||||
|
||||
if isinstance(strOrQuery, str):
|
||||
strOrQuery = translateQuery(parseQuery(strOrQuery), base, initNs)
|
||||
|
||||
return evalQuery(self.graph, strOrQuery, initBindings, base)
|
||||
+3
@@ -0,0 +1,3 @@
|
||||
"""
|
||||
Parsers and serializers for SPARQL Result formats
|
||||
"""
|
||||
+104
@@ -0,0 +1,104 @@
|
||||
"""
|
||||
|
||||
This module implements a parser and serializer for the CSV SPARQL result
|
||||
formats
|
||||
|
||||
http://www.w3.org/TR/sparql11-results-csv-tsv/
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import codecs
|
||||
import csv
|
||||
from io import BufferedIOBase, TextIOBase
|
||||
from typing import IO, Dict, List, Optional, Union, cast
|
||||
|
||||
from rdflib.plugins.sparql.processor import SPARQLResult
|
||||
from rdflib.query import Result, ResultParser, ResultSerializer
|
||||
from rdflib.term import BNode, Identifier, Literal, URIRef, Variable
|
||||
|
||||
|
||||
class CSVResultParser(ResultParser):
|
||||
def __init__(self):
|
||||
self.delim = ","
|
||||
|
||||
# type error: Signature of "parse" incompatible with supertype "ResultParser"
|
||||
def parse(self, source: IO, content_type: Optional[str] = None) -> Result: # type: ignore[override]
|
||||
r = Result("SELECT")
|
||||
|
||||
# type error: Incompatible types in assignment (expression has type "StreamReader", variable has type "IO[Any]")
|
||||
if isinstance(source.read(0), bytes):
|
||||
# if reading from source returns bytes do utf-8 decoding
|
||||
# type error: Incompatible types in assignment (expression has type "StreamReader", variable has type "IO[Any]")
|
||||
source = codecs.getreader("utf-8")(source) # type: ignore[assignment]
|
||||
|
||||
reader = csv.reader(source, delimiter=self.delim)
|
||||
r.vars = [Variable(x) for x in next(reader)]
|
||||
r.bindings = []
|
||||
|
||||
for row in reader:
|
||||
r.bindings.append(self.parseRow(row, r.vars))
|
||||
|
||||
return r
|
||||
|
||||
def parseRow(
|
||||
self, row: List[str], v: List[Variable]
|
||||
) -> Dict[Variable, Union[BNode, URIRef, Literal]]:
|
||||
return dict(
|
||||
(var, val)
|
||||
for var, val in zip(v, [self.convertTerm(t) for t in row])
|
||||
if val is not None
|
||||
)
|
||||
|
||||
def convertTerm(self, t: str) -> Optional[Union[BNode, URIRef, Literal]]:
|
||||
if t == "":
|
||||
return None
|
||||
if t.startswith("_:"):
|
||||
return BNode(t) # or generate new IDs?
|
||||
if t.startswith("http://") or t.startswith("https://"): # TODO: more?
|
||||
return URIRef(t)
|
||||
return Literal(t)
|
||||
|
||||
|
||||
class CSVResultSerializer(ResultSerializer):
|
||||
def __init__(self, result: SPARQLResult):
|
||||
ResultSerializer.__init__(self, result)
|
||||
|
||||
self.delim = ","
|
||||
if result.type != "SELECT":
|
||||
raise Exception("CSVSerializer can only serialize select query results")
|
||||
|
||||
def serialize(self, stream: IO, encoding: str = "utf-8", **kwargs) -> None:
|
||||
# the serialiser writes bytes in the given encoding
|
||||
# in py3 csv.writer is unicode aware and writes STRINGS,
|
||||
# so we encode afterward
|
||||
|
||||
import codecs
|
||||
|
||||
# TODO: Find a better solution for all this casting
|
||||
writable_stream = cast(Union[TextIOBase, BufferedIOBase], stream)
|
||||
if isinstance(writable_stream, TextIOBase):
|
||||
string_stream: TextIOBase = writable_stream
|
||||
else:
|
||||
byte_stream = cast(BufferedIOBase, writable_stream)
|
||||
string_stream = cast(TextIOBase, codecs.getwriter(encoding)(byte_stream))
|
||||
|
||||
out = csv.writer(string_stream, delimiter=self.delim)
|
||||
|
||||
vs = [self.serializeTerm(v, encoding) for v in self.result.vars] # type: ignore[union-attr]
|
||||
out.writerow(vs)
|
||||
for row in self.result.bindings:
|
||||
out.writerow(
|
||||
[self.serializeTerm(row.get(v), encoding) for v in self.result.vars] # type: ignore[union-attr]
|
||||
)
|
||||
|
||||
def serializeTerm(
|
||||
self, term: Optional[Identifier], encoding: str
|
||||
) -> Union[str, Identifier]:
|
||||
if term is None:
|
||||
return ""
|
||||
elif isinstance(term, BNode):
|
||||
return f"_:{term}"
|
||||
else:
|
||||
return term
|
||||
@@ -0,0 +1,16 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import IO, Optional
|
||||
|
||||
from rdflib.graph import Graph
|
||||
from rdflib.query import Result, ResultParser
|
||||
|
||||
|
||||
class GraphResultParser(ResultParser):
|
||||
# type error: Signature of "parse" incompatible with supertype "ResultParser"
|
||||
def parse(self, source: IO, content_type: Optional[str]) -> Result: # type: ignore[override]
|
||||
res = Result("CONSTRUCT") # hmm - or describe?type_)
|
||||
res.graph = Graph()
|
||||
res.graph.parse(source, format=content_type)
|
||||
|
||||
return res
|
||||
+164
@@ -0,0 +1,164 @@
|
||||
"""A Serializer for SPARQL results in JSON:
|
||||
|
||||
http://www.w3.org/TR/rdf-sparql-json-res/
|
||||
|
||||
Bits and pieces borrowed from:
|
||||
http://projects.bigasterisk.com/sparqlhttp/
|
||||
|
||||
Authors: Drew Perttula, Gunnar Aastrand Grimnes
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import IO, Any, Dict, Mapping, MutableSequence, Optional
|
||||
|
||||
from rdflib.query import Result, ResultException, ResultParser, ResultSerializer
|
||||
from rdflib.term import BNode, Identifier, Literal, URIRef, Variable
|
||||
|
||||
try:
|
||||
import orjson
|
||||
|
||||
_HAS_ORJSON = True
|
||||
except ImportError:
|
||||
orjson = None # type: ignore[assignment, unused-ignore]
|
||||
_HAS_ORJSON = False
|
||||
|
||||
|
||||
class JSONResultParser(ResultParser):
|
||||
# type error: Signature of "parse" incompatible with supertype "ResultParser"
|
||||
def parse(self, source: IO, content_type: Optional[str] = None) -> Result: # type: ignore[override]
|
||||
inp = source.read()
|
||||
if _HAS_ORJSON:
|
||||
try:
|
||||
loaded = orjson.loads(inp)
|
||||
except Exception as e:
|
||||
raise ResultException(f"Failed to parse result: {e}")
|
||||
else:
|
||||
if isinstance(inp, bytes):
|
||||
inp = inp.decode("utf-8")
|
||||
loaded = json.loads(inp)
|
||||
return JSONResult(loaded)
|
||||
|
||||
|
||||
class JSONResultSerializer(ResultSerializer):
|
||||
def __init__(self, result: Result):
|
||||
ResultSerializer.__init__(self, result)
|
||||
|
||||
# type error: Signature of "serialize" incompatible with supertype "ResultSerializer"
|
||||
def serialize(self, stream: IO, encoding: str = None) -> None: # type: ignore[override]
|
||||
res: Dict[str, Any] = {}
|
||||
if self.result.type == "ASK":
|
||||
res["head"] = {}
|
||||
res["boolean"] = self.result.askAnswer
|
||||
else:
|
||||
# select
|
||||
res["results"] = {}
|
||||
res["head"] = {}
|
||||
res["head"]["vars"] = self.result.vars
|
||||
res["results"]["bindings"] = [
|
||||
self._bindingToJSON(x) for x in self.result.bindings
|
||||
]
|
||||
if _HAS_ORJSON:
|
||||
try:
|
||||
r_bytes = orjson.dumps(res, option=orjson.OPT_NON_STR_KEYS)
|
||||
except Exception as e:
|
||||
raise ResultException(f"Failed to serialize result: {e}")
|
||||
if encoding is not None:
|
||||
# Note, orjson will always write utf-8 even if
|
||||
# encoding is specified as something else.
|
||||
try:
|
||||
stream.write(r_bytes)
|
||||
except (TypeError, ValueError):
|
||||
stream.write(r_bytes.decode("utf-8"))
|
||||
else:
|
||||
stream.write(r_bytes.decode("utf-8"))
|
||||
else:
|
||||
r_str = json.dumps(res, allow_nan=False, ensure_ascii=False)
|
||||
if encoding is not None:
|
||||
try:
|
||||
stream.write(r_str.encode(encoding))
|
||||
except (TypeError, ValueError):
|
||||
stream.write(r_str)
|
||||
else:
|
||||
stream.write(r_str)
|
||||
|
||||
def _bindingToJSON(self, b: Mapping[Variable, Identifier]) -> Dict[Variable, Any]:
|
||||
res = {}
|
||||
for var in b:
|
||||
j = termToJSON(self, b[var])
|
||||
if j is not None:
|
||||
res[var] = termToJSON(self, b[var])
|
||||
return res
|
||||
|
||||
|
||||
class JSONResult(Result):
|
||||
def __init__(self, json: Dict[str, Any]):
|
||||
self.json = json
|
||||
if "boolean" in json:
|
||||
type_ = "ASK"
|
||||
elif "results" in json:
|
||||
type_ = "SELECT"
|
||||
else:
|
||||
raise ResultException("No boolean or results in json!")
|
||||
|
||||
Result.__init__(self, type_)
|
||||
|
||||
if type_ == "ASK":
|
||||
self.askAnswer = bool(json["boolean"])
|
||||
else:
|
||||
self.bindings = self._get_bindings()
|
||||
self.vars = [Variable(x) for x in json["head"]["vars"]]
|
||||
|
||||
def _get_bindings(self) -> MutableSequence[Mapping[Variable, Identifier]]:
|
||||
ret: MutableSequence[Mapping[Variable, Identifier]] = []
|
||||
for row in self.json["results"]["bindings"]:
|
||||
outRow: Dict[Variable, Identifier] = {}
|
||||
for k, v in row.items():
|
||||
outRow[Variable(k)] = parseJsonTerm(v)
|
||||
ret.append(outRow)
|
||||
return ret
|
||||
|
||||
|
||||
def parseJsonTerm(d: Dict[str, str]) -> Identifier:
|
||||
"""rdflib object (Literal, URIRef, BNode) for the given json-format dict.
|
||||
|
||||
input is like:
|
||||
{ 'type': 'uri', 'value': 'http://famegame.com/2006/01/username' }
|
||||
{ 'type': 'literal', 'value': 'drewp' }
|
||||
"""
|
||||
|
||||
t = d["type"]
|
||||
if t == "uri":
|
||||
return URIRef(d["value"])
|
||||
elif t == "literal":
|
||||
return Literal(d["value"], datatype=d.get("datatype"), lang=d.get("xml:lang"))
|
||||
elif t == "typed-literal":
|
||||
return Literal(d["value"], datatype=URIRef(d["datatype"]))
|
||||
elif t == "bnode":
|
||||
return BNode(d["value"])
|
||||
else:
|
||||
raise NotImplementedError("json term type %r" % t)
|
||||
|
||||
|
||||
def termToJSON(
|
||||
self: JSONResultSerializer, term: Optional[Identifier]
|
||||
) -> Optional[Dict[str, str]]:
|
||||
if isinstance(term, URIRef):
|
||||
return {"type": "uri", "value": str(term)}
|
||||
elif isinstance(term, Literal):
|
||||
r = {"type": "literal", "value": str(term)}
|
||||
|
||||
if term.datatype is not None:
|
||||
r["datatype"] = str(term.datatype)
|
||||
if term.language is not None:
|
||||
r["xml:lang"] = term.language
|
||||
return r
|
||||
|
||||
elif isinstance(term, BNode):
|
||||
return {"type": "bnode", "value": str(term)}
|
||||
elif term is None:
|
||||
return None
|
||||
else:
|
||||
raise ResultException("Unknown term type: %s (%s)" % (term, type(term)))
|
||||
+70
@@ -0,0 +1,70 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import IO, Any, MutableMapping, Optional, Union
|
||||
|
||||
from rdflib.graph import Graph
|
||||
from rdflib.namespace import RDF, Namespace
|
||||
from rdflib.query import Result, ResultParser
|
||||
from rdflib.term import Node, Variable
|
||||
|
||||
RS = Namespace("http://www.w3.org/2001/sw/DataAccess/tests/result-set#")
|
||||
|
||||
|
||||
class RDFResultParser(ResultParser):
|
||||
def parse(self, source: Union[IO, Graph], **kwargs: Any) -> Result:
|
||||
return RDFResult(source, **kwargs)
|
||||
|
||||
|
||||
class RDFResult(Result):
|
||||
def __init__(self, source: Union[IO, Graph], **kwargs: Any):
|
||||
if not isinstance(source, Graph):
|
||||
graph = Graph()
|
||||
graph.parse(source, **kwargs)
|
||||
else:
|
||||
graph = source
|
||||
|
||||
rs = graph.value(predicate=RDF.type, object=RS.ResultSet)
|
||||
# there better be only one :)
|
||||
|
||||
if rs is None:
|
||||
type_ = "CONSTRUCT"
|
||||
|
||||
# use a new graph
|
||||
g = Graph()
|
||||
g += graph
|
||||
|
||||
else:
|
||||
askAnswer = graph.value(rs, RS.boolean)
|
||||
|
||||
if askAnswer is not None:
|
||||
type_ = "ASK"
|
||||
else:
|
||||
type_ = "SELECT"
|
||||
|
||||
Result.__init__(self, type_)
|
||||
|
||||
if type_ == "SELECT":
|
||||
# type error: Argument 1 to "Variable" has incompatible type "Node"; expected "str"
|
||||
self.vars = [Variable(v) for v in graph.objects(rs, RS.resultVariable)] # type: ignore[arg-type]
|
||||
|
||||
self.bindings = []
|
||||
|
||||
for s in graph.objects(rs, RS.solution):
|
||||
sol: MutableMapping[Variable, Optional[Node]] = {}
|
||||
for b in graph.objects(s, RS.binding):
|
||||
# type error: Argument 1 to "Variable" has incompatible type "Optional[Node]"; expected "str"
|
||||
sol[Variable(graph.value(b, RS.variable))] = graph.value( # type: ignore[arg-type]
|
||||
b, RS.value
|
||||
)
|
||||
# error: Argument 1 to "append" of "list" has incompatible type "MutableMapping[Variable, Optional[Node]]"; expected "Mapping[Variable, Identifier]"
|
||||
self.bindings.append(sol) # type: ignore[arg-type]
|
||||
elif type_ == "ASK":
|
||||
# type error: Item "Node" of "Optional[Node]" has no attribute "value"
|
||||
# type error: Item "None" of "Optional[Node]" has no attribute "value"
|
||||
self.askAnswer = askAnswer.value # type: ignore[union-attr]
|
||||
# type error: Item "Node" of "Optional[Node]" has no attribute "value"
|
||||
# type error: Item "None" of "Optional[Node]" has no attribute "value"
|
||||
if askAnswer.value is None: # type: ignore[union-attr]
|
||||
raise Exception("Malformed boolean in ask answer!")
|
||||
elif type_ == "CONSTRUCT":
|
||||
self.graph = g
|
||||
+105
@@ -0,0 +1,105 @@
|
||||
"""
|
||||
This implements the Tab Separated SPARQL Result Format
|
||||
|
||||
It is implemented with pyparsing, reusing the elements from the SPARQL Parser
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import codecs
|
||||
import typing
|
||||
from typing import IO, Union
|
||||
|
||||
from pyparsing import (
|
||||
FollowedBy,
|
||||
LineEnd,
|
||||
Literal,
|
||||
Optional,
|
||||
ParserElement,
|
||||
Suppress,
|
||||
ZeroOrMore,
|
||||
)
|
||||
|
||||
from rdflib.plugins.sparql.parser import (
|
||||
BLANK_NODE_LABEL,
|
||||
IRIREF,
|
||||
LANGTAG,
|
||||
STRING_LITERAL1,
|
||||
STRING_LITERAL2,
|
||||
BooleanLiteral,
|
||||
NumericLiteral,
|
||||
Var,
|
||||
)
|
||||
from rdflib.plugins.sparql.parserutils import Comp, CompValue, Param
|
||||
from rdflib.query import Result, ResultParser
|
||||
from rdflib.term import BNode, URIRef
|
||||
from rdflib.term import Literal as RDFLiteral
|
||||
|
||||
ParserElement.setDefaultWhitespaceChars(" \n")
|
||||
|
||||
|
||||
String = STRING_LITERAL1 | STRING_LITERAL2
|
||||
|
||||
RDFLITERAL = Comp(
|
||||
"literal",
|
||||
Param("string", String)
|
||||
+ Optional(
|
||||
Param("lang", LANGTAG.leaveWhitespace())
|
||||
| Literal("^^").leaveWhitespace() + Param("datatype", IRIREF).leaveWhitespace()
|
||||
),
|
||||
)
|
||||
|
||||
NONE_VALUE = object()
|
||||
|
||||
EMPTY = FollowedBy(LineEnd()) | FollowedBy("\t")
|
||||
EMPTY.setParseAction(lambda x: NONE_VALUE)
|
||||
|
||||
TERM = RDFLITERAL | IRIREF | BLANK_NODE_LABEL | NumericLiteral | BooleanLiteral
|
||||
|
||||
ROW = (EMPTY | TERM) + ZeroOrMore(Suppress("\t") + (EMPTY | TERM))
|
||||
ROW.parseWithTabs()
|
||||
|
||||
HEADER = Var + ZeroOrMore(Suppress("\t") + Var)
|
||||
HEADER.parseWithTabs()
|
||||
|
||||
|
||||
class TSVResultParser(ResultParser):
|
||||
# type error: Signature of "parse" incompatible with supertype "ResultParser" [override]
|
||||
def parse(self, source: IO, content_type: typing.Optional[str] = None) -> Result: # type: ignore[override]
|
||||
if isinstance(source.read(0), bytes):
|
||||
# if reading from source returns bytes do utf-8 decoding
|
||||
# type error: Incompatible types in assignment (expression has type "StreamReader", variable has type "IO[Any]")
|
||||
source = codecs.getreader("utf-8")(source) # type: ignore[assignment]
|
||||
|
||||
r = Result("SELECT")
|
||||
|
||||
header = source.readline()
|
||||
|
||||
r.vars = list(HEADER.parseString(header.strip(), parseAll=True))
|
||||
r.bindings = []
|
||||
while True:
|
||||
line = source.readline()
|
||||
if not line:
|
||||
break
|
||||
line = line.strip("\n")
|
||||
if line == "":
|
||||
continue
|
||||
|
||||
row = ROW.parseString(line, parseAll=True)
|
||||
# type error: Generator has incompatible item type "object"; expected "Identifier"
|
||||
r.bindings.append(dict(zip(r.vars, (self.convertTerm(x) for x in row)))) # type: ignore[misc]
|
||||
|
||||
return r
|
||||
|
||||
def convertTerm(
|
||||
self, t: Union[object, RDFLiteral, BNode, CompValue, URIRef]
|
||||
) -> typing.Optional[Union[object, BNode, URIRef, RDFLiteral]]:
|
||||
if t is NONE_VALUE:
|
||||
return None
|
||||
if isinstance(t, CompValue):
|
||||
if t.name == "literal":
|
||||
return RDFLiteral(t.string, lang=t.lang, datatype=t.datatype)
|
||||
else:
|
||||
raise Exception("I dont know how to handle this: %s" % (t,))
|
||||
else:
|
||||
return t
|
||||
+86
@@ -0,0 +1,86 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from io import StringIO
|
||||
from typing import IO, List, Optional, Union
|
||||
|
||||
from rdflib.namespace import NamespaceManager
|
||||
from rdflib.query import ResultSerializer
|
||||
from rdflib.term import BNode, Literal, URIRef, Variable
|
||||
|
||||
|
||||
def _termString(
|
||||
t: Optional[Union[URIRef, Literal, BNode]],
|
||||
namespace_manager: Optional[NamespaceManager],
|
||||
) -> str:
|
||||
if t is None:
|
||||
return "-"
|
||||
if namespace_manager:
|
||||
if isinstance(t, URIRef):
|
||||
return namespace_manager.normalizeUri(t)
|
||||
elif isinstance(t, BNode):
|
||||
return t.n3()
|
||||
elif isinstance(t, Literal):
|
||||
return t._literal_n3(qname_callback=namespace_manager.normalizeUri)
|
||||
else:
|
||||
return t.n3()
|
||||
|
||||
|
||||
class TXTResultSerializer(ResultSerializer):
|
||||
"""
|
||||
A write-only QueryResult serializer for text/ascii tables
|
||||
"""
|
||||
|
||||
def serialize(
|
||||
self,
|
||||
stream: IO,
|
||||
encoding: str = "utf-8",
|
||||
*,
|
||||
namespace_manager: Optional[NamespaceManager] = None,
|
||||
**kwargs,
|
||||
) -> None:
|
||||
"""
|
||||
return a text table of query results
|
||||
"""
|
||||
|
||||
def c(s, w):
|
||||
"""
|
||||
center the string s in w wide string
|
||||
"""
|
||||
w -= len(s)
|
||||
h1 = h2 = w // 2
|
||||
if w % 2:
|
||||
h2 += 1
|
||||
return " " * h1 + s + " " * h2
|
||||
|
||||
if self.result.type != "SELECT":
|
||||
raise Exception("Can only pretty print SELECT results!")
|
||||
string_stream = StringIO()
|
||||
if not self.result:
|
||||
string_stream.write("(no results)\n")
|
||||
else:
|
||||
keys: List[Variable] = self.result.vars # type: ignore[assignment]
|
||||
maxlen = [0] * len(keys)
|
||||
b = [
|
||||
# type error: Value of type "Union[Tuple[Node, Node, Node], bool, ResultRow]" is not indexable
|
||||
# type error: Argument 1 to "_termString" has incompatible type "Union[Node, Any]"; expected "Union[URIRef, Literal, BNode, None]" [arg-type]
|
||||
# type error: No overload variant of "__getitem__" of "tuple" matches argument type "Variable"
|
||||
# NOTE on type error: The problem here is that r can be more types than _termString expects because result can be a result of multiple types.
|
||||
[_termString(r[k], namespace_manager) for k in keys] # type: ignore[index, arg-type, call-overload]
|
||||
for r in self.result
|
||||
]
|
||||
for r in b:
|
||||
for i in range(len(keys)):
|
||||
maxlen[i] = max(maxlen[i], len(r[i]))
|
||||
string_stream.write(
|
||||
"|".join([c(k, maxlen[i]) for i, k in enumerate(keys)]) + "\n"
|
||||
)
|
||||
string_stream.write("-" * (len(maxlen) + sum(maxlen)) + "\n")
|
||||
for r in sorted(b):
|
||||
string_stream.write(
|
||||
"|".join([t + " " * (i - len(t)) for i, t in zip(maxlen, r)]) + "\n"
|
||||
)
|
||||
text_val = string_stream.getvalue()
|
||||
try:
|
||||
stream.write(text_val.encode(encoding))
|
||||
except (TypeError, ValueError):
|
||||
stream.write(text_val)
|
||||
+301
@@ -0,0 +1,301 @@
|
||||
"""A Parser for SPARQL results in XML:
|
||||
|
||||
http://www.w3.org/TR/rdf-sparql-XMLres/
|
||||
|
||||
Bits and pieces borrowed from:
|
||||
http://projects.bigasterisk.com/sparqlhttp/
|
||||
|
||||
Authors: Drew Perttula, Gunnar Aastrand Grimnes
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import xml.etree.ElementTree as xml_etree # noqa: N813
|
||||
from io import BytesIO
|
||||
from typing import (
|
||||
IO,
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
BinaryIO,
|
||||
Dict,
|
||||
Optional,
|
||||
Sequence,
|
||||
TextIO,
|
||||
Tuple,
|
||||
Union,
|
||||
cast,
|
||||
)
|
||||
from xml.dom import XML_NAMESPACE
|
||||
from xml.sax.saxutils import XMLGenerator
|
||||
from xml.sax.xmlreader import AttributesNSImpl
|
||||
|
||||
from rdflib.query import Result, ResultException, ResultParser, ResultSerializer
|
||||
from rdflib.term import BNode, Identifier, Literal, URIRef, Variable
|
||||
|
||||
try:
|
||||
# https://adamj.eu/tech/2021/12/29/python-type-hints-optional-imports/
|
||||
import lxml.etree as lxml_etree
|
||||
|
||||
FOUND_LXML = True
|
||||
except ImportError:
|
||||
FOUND_LXML = False
|
||||
|
||||
SPARQL_XML_NAMESPACE = "http://www.w3.org/2005/sparql-results#"
|
||||
RESULTS_NS_ET = "{%s}" % SPARQL_XML_NAMESPACE
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class XMLResultParser(ResultParser):
|
||||
# TODO FIXME: content_type should be a keyword only arg.
|
||||
def parse(self, source: IO, content_type: Optional[str] = None) -> Result: # type: ignore[override]
|
||||
return XMLResult(source)
|
||||
|
||||
|
||||
class XMLResult(Result):
|
||||
def __init__(self, source: IO, content_type: Optional[str] = None):
|
||||
parser_encoding: Optional[str] = None
|
||||
if hasattr(source, "encoding"):
|
||||
if TYPE_CHECKING:
|
||||
assert isinstance(source, TextIO)
|
||||
parser_encoding = "utf-8"
|
||||
source_str = source.read()
|
||||
source = BytesIO(source_str.encode(parser_encoding))
|
||||
else:
|
||||
if TYPE_CHECKING:
|
||||
assert isinstance(source, BinaryIO)
|
||||
|
||||
if FOUND_LXML:
|
||||
lxml_parser = lxml_etree.XMLParser(huge_tree=True, encoding=parser_encoding)
|
||||
tree = cast(
|
||||
xml_etree.ElementTree,
|
||||
lxml_etree.parse(source, parser=lxml_parser),
|
||||
)
|
||||
else:
|
||||
xml_parser = xml_etree.XMLParser(encoding=parser_encoding)
|
||||
tree = xml_etree.parse(source, parser=xml_parser)
|
||||
|
||||
boolean = tree.find(RESULTS_NS_ET + "boolean")
|
||||
results = tree.find(RESULTS_NS_ET + "results")
|
||||
|
||||
if boolean is not None:
|
||||
type_ = "ASK"
|
||||
elif results is not None:
|
||||
type_ = "SELECT"
|
||||
else:
|
||||
raise ResultException("No RDF result-bindings or boolean answer found!")
|
||||
|
||||
Result.__init__(self, type_)
|
||||
|
||||
if type_ == "SELECT":
|
||||
self.bindings = []
|
||||
for result in results: # type: ignore[union-attr]
|
||||
if result.tag != f"{RESULTS_NS_ET}result":
|
||||
# This is here because with lxml this also gets comments,
|
||||
# not just elements. Also this should not operate on non
|
||||
# "result" elements.
|
||||
continue
|
||||
r = {}
|
||||
for binding in result:
|
||||
if binding.tag != f"{RESULTS_NS_ET}binding":
|
||||
# This is here because with lxml this also gets
|
||||
# comments, not just elements. Also this should not
|
||||
# operate on non "binding" elements.
|
||||
continue
|
||||
# type error: error: Argument 1 to "Variable" has incompatible type "Union[str, None, Any]"; expected "str"
|
||||
# NOTE on type error: Element.get() can return None, and
|
||||
# this will invariably fail if passed into Variable
|
||||
# constructor as value
|
||||
r[Variable(binding.get("name"))] = parseTerm(binding[0]) # type: ignore[arg-type] # FIXME
|
||||
self.bindings.append(r)
|
||||
|
||||
self.vars = [
|
||||
# type error: Argument 1 to "Variable" has incompatible type "Optional[str]"; expected "str"
|
||||
# NOTE on type error: Element.get() can return None, and this
|
||||
# will invariably fail if passed into Variable constructor as
|
||||
# value
|
||||
Variable(x.get("name")) # type: ignore[arg-type] # FIXME
|
||||
for x in tree.findall(
|
||||
"./%shead/%svariable" % (RESULTS_NS_ET, RESULTS_NS_ET)
|
||||
)
|
||||
]
|
||||
|
||||
else:
|
||||
self.askAnswer = boolean.text.lower().strip() == "true" # type: ignore[union-attr]
|
||||
|
||||
|
||||
def parseTerm(element: xml_etree.Element) -> Union[URIRef, Literal, BNode]:
|
||||
"""rdflib object (Literal, URIRef, BNode) for the given
|
||||
elementtree element"""
|
||||
tag, text = element.tag, element.text
|
||||
if tag == RESULTS_NS_ET + "literal":
|
||||
if text is None:
|
||||
text = ""
|
||||
datatype = None
|
||||
lang = None
|
||||
if element.get("datatype", None):
|
||||
# type error: Argument 1 to "URIRef" has incompatible type "Optional[str]"; expected "str"
|
||||
datatype = URIRef(element.get("datatype")) # type: ignore[arg-type]
|
||||
elif element.get("{%s}lang" % XML_NAMESPACE, None):
|
||||
lang = element.get("{%s}lang" % XML_NAMESPACE)
|
||||
|
||||
ret = Literal(text, datatype=datatype, lang=lang)
|
||||
|
||||
return ret
|
||||
elif tag == RESULTS_NS_ET + "uri":
|
||||
# type error: Argument 1 to "URIRef" has incompatible type "Optional[str]"; expected "str"
|
||||
return URIRef(text) # type: ignore[arg-type]
|
||||
elif tag == RESULTS_NS_ET + "bnode":
|
||||
return BNode(text)
|
||||
else:
|
||||
raise TypeError("unknown binding type %r" % element)
|
||||
|
||||
|
||||
class XMLResultSerializer(ResultSerializer):
|
||||
def __init__(self, result: Result):
|
||||
ResultSerializer.__init__(self, result)
|
||||
|
||||
def serialize(self, stream: IO, encoding: str = "utf-8", **kwargs: Any) -> None:
|
||||
writer = SPARQLXMLWriter(stream, encoding)
|
||||
if self.result.type == "ASK":
|
||||
writer.write_header([])
|
||||
# type error: Argument 1 to "write_ask" of "SPARQLXMLWriter" has incompatible type "Optional[bool]"; expected "bool"
|
||||
writer.write_ask(self.result.askAnswer) # type: ignore[arg-type]
|
||||
else:
|
||||
# type error: Argument 1 to "write_header" of "SPARQLXMLWriter" has incompatible type "Optional[List[Variable]]"; expected "Sequence[Variable]"
|
||||
writer.write_header(self.result.vars) # type: ignore[arg-type]
|
||||
writer.write_results_header()
|
||||
for b in self.result.bindings:
|
||||
writer.write_start_result()
|
||||
for key, val in b.items():
|
||||
writer.write_binding(key, val)
|
||||
|
||||
writer.write_end_result()
|
||||
|
||||
writer.close()
|
||||
|
||||
|
||||
# TODO: Rewrite with ElementTree?
|
||||
class SPARQLXMLWriter:
|
||||
"""
|
||||
Python saxutils-based SPARQL XML Writer
|
||||
"""
|
||||
|
||||
def __init__(self, output: IO, encoding: str = "utf-8"):
|
||||
writer = XMLGenerator(output, encoding)
|
||||
writer.startDocument()
|
||||
writer.startPrefixMapping("", SPARQL_XML_NAMESPACE)
|
||||
writer.startPrefixMapping("xml", XML_NAMESPACE)
|
||||
writer.startElementNS(
|
||||
(SPARQL_XML_NAMESPACE, "sparql"), "sparql", AttributesNSImpl({}, {})
|
||||
)
|
||||
self.writer = writer
|
||||
self._output = output
|
||||
self._encoding = encoding
|
||||
self._results = False
|
||||
|
||||
def write_header(self, allvarsL: Sequence[Variable]) -> None:
|
||||
self.writer.startElementNS(
|
||||
(SPARQL_XML_NAMESPACE, "head"), "head", AttributesNSImpl({}, {})
|
||||
)
|
||||
for i in range(0, len(allvarsL)):
|
||||
attr_vals = {
|
||||
(None, "name"): str(allvarsL[i]),
|
||||
}
|
||||
attr_qnames = {
|
||||
(None, "name"): "name",
|
||||
}
|
||||
self.writer.startElementNS(
|
||||
(SPARQL_XML_NAMESPACE, "variable"),
|
||||
"variable",
|
||||
# type error: Argument 1 to "AttributesNSImpl" has incompatible type "Dict[Tuple[None, str], str]"; expected "Mapping[Tuple[str, str], str]"
|
||||
# type error: Argument 2 to "AttributesNSImpl" has incompatible type "Dict[Tuple[None, str], str]"; expected "Mapping[Tuple[str, str], str]" [arg-type]
|
||||
AttributesNSImpl(attr_vals, attr_qnames), # type: ignore[arg-type]
|
||||
)
|
||||
self.writer.endElementNS((SPARQL_XML_NAMESPACE, "variable"), "variable")
|
||||
self.writer.endElementNS((SPARQL_XML_NAMESPACE, "head"), "head")
|
||||
|
||||
def write_ask(self, val: bool) -> None:
|
||||
self.writer.startElementNS(
|
||||
(SPARQL_XML_NAMESPACE, "boolean"), "boolean", AttributesNSImpl({}, {})
|
||||
)
|
||||
self.writer.characters(str(val).lower())
|
||||
self.writer.endElementNS((SPARQL_XML_NAMESPACE, "boolean"), "boolean")
|
||||
|
||||
def write_results_header(self) -> None:
|
||||
self.writer.startElementNS(
|
||||
(SPARQL_XML_NAMESPACE, "results"), "results", AttributesNSImpl({}, {})
|
||||
)
|
||||
self._results = True
|
||||
|
||||
def write_start_result(self) -> None:
|
||||
self.writer.startElementNS(
|
||||
(SPARQL_XML_NAMESPACE, "result"), "result", AttributesNSImpl({}, {})
|
||||
)
|
||||
self._resultStarted = True
|
||||
|
||||
def write_end_result(self) -> None:
|
||||
assert self._resultStarted
|
||||
self.writer.endElementNS((SPARQL_XML_NAMESPACE, "result"), "result")
|
||||
self._resultStarted = False
|
||||
|
||||
def write_binding(self, name: Variable, val: Identifier) -> None:
|
||||
assert self._resultStarted
|
||||
|
||||
attr_vals: Dict[Tuple[Optional[str], str], str] = {
|
||||
(None, "name"): str(name),
|
||||
}
|
||||
attr_qnames: Dict[Tuple[Optional[str], str], str] = {
|
||||
(None, "name"): "name",
|
||||
}
|
||||
self.writer.startElementNS(
|
||||
(SPARQL_XML_NAMESPACE, "binding"),
|
||||
"binding",
|
||||
# type error: Argument 1 to "AttributesNSImpl" has incompatible type "Dict[Tuple[None, str], str]"; expected "Mapping[Tuple[str, str], str]"
|
||||
# type error: Argument 2 to "AttributesNSImpl" has incompatible type "Dict[Tuple[None, str], str]"; expected "Mapping[Tuple[str, str], str]"
|
||||
AttributesNSImpl(attr_vals, attr_qnames), # type: ignore[arg-type, unused-ignore]
|
||||
)
|
||||
|
||||
if isinstance(val, URIRef):
|
||||
self.writer.startElementNS(
|
||||
(SPARQL_XML_NAMESPACE, "uri"), "uri", AttributesNSImpl({}, {})
|
||||
)
|
||||
self.writer.characters(val)
|
||||
self.writer.endElementNS((SPARQL_XML_NAMESPACE, "uri"), "uri")
|
||||
elif isinstance(val, BNode):
|
||||
self.writer.startElementNS(
|
||||
(SPARQL_XML_NAMESPACE, "bnode"), "bnode", AttributesNSImpl({}, {})
|
||||
)
|
||||
self.writer.characters(val)
|
||||
self.writer.endElementNS((SPARQL_XML_NAMESPACE, "bnode"), "bnode")
|
||||
elif isinstance(val, Literal):
|
||||
attr_vals = {}
|
||||
attr_qnames = {}
|
||||
if val.language:
|
||||
attr_vals[(XML_NAMESPACE, "lang")] = val.language
|
||||
attr_qnames[(XML_NAMESPACE, "lang")] = "xml:lang"
|
||||
elif val.datatype:
|
||||
attr_vals[(None, "datatype")] = val.datatype
|
||||
attr_qnames[(None, "datatype")] = "datatype"
|
||||
|
||||
self.writer.startElementNS(
|
||||
(SPARQL_XML_NAMESPACE, "literal"),
|
||||
"literal",
|
||||
# type error: Argument 1 to "AttributesNSImpl" has incompatible type "Dict[Tuple[Optional[str], str], str]"; expected "Mapping[Tuple[str, str], str]"
|
||||
# type error: Argument 2 to "AttributesNSImpl" has incompatible type "Dict[Tuple[Optional[str], str], str]"; expected "Mapping[Tuple[str, str], str]"
|
||||
AttributesNSImpl(attr_vals, attr_qnames), # type: ignore[arg-type, unused-ignore]
|
||||
)
|
||||
self.writer.characters(val)
|
||||
self.writer.endElementNS((SPARQL_XML_NAMESPACE, "literal"), "literal")
|
||||
|
||||
else:
|
||||
raise Exception("Unsupported RDF term: %s" % val)
|
||||
|
||||
self.writer.endElementNS((SPARQL_XML_NAMESPACE, "binding"), "binding")
|
||||
|
||||
def close(self) -> None:
|
||||
if self._results:
|
||||
self.writer.endElementNS((SPARQL_XML_NAMESPACE, "results"), "results")
|
||||
self.writer.endElementNS((SPARQL_XML_NAMESPACE, "sparql"), "sparql")
|
||||
self.writer.endDocument()
|
||||
@@ -0,0 +1,499 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import collections
|
||||
import datetime
|
||||
import itertools
|
||||
import typing as t
|
||||
from collections.abc import Mapping, MutableMapping
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Container,
|
||||
Dict,
|
||||
Generator,
|
||||
Iterable,
|
||||
List,
|
||||
Optional,
|
||||
Tuple,
|
||||
TypeVar,
|
||||
Union,
|
||||
)
|
||||
|
||||
import rdflib.plugins.sparql
|
||||
from rdflib.graph import ConjunctiveGraph, Dataset, Graph
|
||||
from rdflib.namespace import NamespaceManager
|
||||
from rdflib.plugins.sparql.parserutils import CompValue
|
||||
from rdflib.term import BNode, Identifier, Literal, Node, URIRef, Variable
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from rdflib.paths import Path
|
||||
|
||||
|
||||
_AnyT = TypeVar("_AnyT")
|
||||
|
||||
|
||||
class SPARQLError(Exception):
|
||||
def __init__(self, msg: Optional[str] = None):
|
||||
Exception.__init__(self, msg)
|
||||
|
||||
|
||||
class NotBoundError(SPARQLError):
|
||||
def __init__(self, msg: Optional[str] = None):
|
||||
SPARQLError.__init__(self, msg)
|
||||
|
||||
|
||||
class AlreadyBound(SPARQLError): # noqa: N818
|
||||
"""Raised when trying to bind a variable that is already bound!"""
|
||||
|
||||
def __init__(self):
|
||||
SPARQLError.__init__(self)
|
||||
|
||||
|
||||
class SPARQLTypeError(SPARQLError):
|
||||
def __init__(self, msg: Optional[str]):
|
||||
SPARQLError.__init__(self, msg)
|
||||
|
||||
|
||||
class Bindings(MutableMapping):
|
||||
"""
|
||||
|
||||
A single level of a stack of variable-value bindings.
|
||||
Each dict keeps a reference to the dict below it,
|
||||
any failed lookup is propegated back
|
||||
|
||||
In python 3.3 this could be a collections.ChainMap
|
||||
"""
|
||||
|
||||
def __init__(self, outer: Optional[Bindings] = None, d=[]):
|
||||
self._d: Dict[str, str] = dict(d)
|
||||
self.outer = outer
|
||||
|
||||
def __getitem__(self, key: str) -> str:
|
||||
if key in self._d:
|
||||
return self._d[key]
|
||||
|
||||
if not self.outer:
|
||||
raise KeyError()
|
||||
return self.outer[key]
|
||||
|
||||
def __contains__(self, key: Any) -> bool:
|
||||
try:
|
||||
self[key]
|
||||
return True
|
||||
except KeyError:
|
||||
return False
|
||||
|
||||
def __setitem__(self, key: str, value: Any) -> None:
|
||||
self._d[key] = value
|
||||
|
||||
def __delitem__(self, key: str) -> None:
|
||||
raise Exception("DelItem is not implemented!")
|
||||
|
||||
def __len__(self) -> int:
|
||||
i = 0
|
||||
d: Optional[Bindings] = self
|
||||
while d is not None:
|
||||
i += len(d._d)
|
||||
d = d.outer
|
||||
return i
|
||||
|
||||
def __iter__(self) -> Generator[str, None, None]:
|
||||
d: Optional[Bindings] = self
|
||||
while d is not None:
|
||||
yield from d._d
|
||||
d = d.outer
|
||||
|
||||
def __str__(self) -> str:
|
||||
# type error: Generator has incompatible item type "Tuple[Any, str]"; expected "str"
|
||||
return "Bindings({" + ", ".join((k, self[k]) for k in self) + "})" # type: ignore[misc]
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return str(self)
|
||||
|
||||
|
||||
class FrozenDict(Mapping):
|
||||
"""
|
||||
An immutable hashable dict
|
||||
|
||||
Taken from http://stackoverflow.com/a/2704866/81121
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, *args: Any, **kwargs: Any):
|
||||
self._d: Dict[Identifier, Identifier] = dict(*args, **kwargs)
|
||||
self._hash: Optional[int] = None
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self._d)
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self._d)
|
||||
|
||||
def __getitem__(self, key: Identifier) -> Identifier:
|
||||
return self._d[key]
|
||||
|
||||
def __hash__(self) -> int:
|
||||
# It would have been simpler and maybe more obvious to
|
||||
# use hash(tuple(sorted(self._d.items()))) from this discussion
|
||||
# so far, but this solution is O(n). I don't know what kind of
|
||||
# n we are going to run into, but sometimes it's hard to resist the
|
||||
# urge to optimize when it will gain improved algorithmic performance.
|
||||
if self._hash is None:
|
||||
self._hash = 0
|
||||
for key, value in self.items():
|
||||
self._hash ^= hash(key)
|
||||
self._hash ^= hash(value)
|
||||
return self._hash
|
||||
|
||||
def project(self, vars: Container[Variable]) -> FrozenDict:
|
||||
return FrozenDict(x for x in self.items() if x[0] in vars)
|
||||
|
||||
def disjointDomain(self, other: t.Mapping[Identifier, Identifier]) -> bool:
|
||||
return not bool(set(self).intersection(other))
|
||||
|
||||
def compatible(self, other: t.Mapping[Identifier, Identifier]) -> bool:
|
||||
for k in self:
|
||||
try:
|
||||
if self[k] != other[k]:
|
||||
return False
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
return True
|
||||
|
||||
def merge(self, other: t.Mapping[Identifier, Identifier]) -> FrozenDict:
|
||||
res = FrozenDict(itertools.chain(self.items(), other.items()))
|
||||
|
||||
return res
|
||||
|
||||
def __str__(self) -> str:
|
||||
return str(self._d)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return repr(self._d)
|
||||
|
||||
|
||||
class FrozenBindings(FrozenDict):
|
||||
def __init__(self, ctx: QueryContext, *args, **kwargs):
|
||||
FrozenDict.__init__(self, *args, **kwargs)
|
||||
self.ctx = ctx
|
||||
|
||||
def __getitem__(self, key: Union[Identifier, str]) -> Identifier:
|
||||
if not isinstance(key, Node):
|
||||
key = Variable(key)
|
||||
|
||||
if not isinstance(key, (BNode, Variable)):
|
||||
return key
|
||||
|
||||
if key not in self._d:
|
||||
# type error: Value of type "Optional[Dict[Variable, Identifier]]" is not indexable
|
||||
# type error: Invalid index type "Union[BNode, Variable]" for "Optional[Dict[Variable, Identifier]]"; expected type "Variable"
|
||||
return self.ctx.initBindings[key] # type: ignore[index]
|
||||
else:
|
||||
return self._d[key]
|
||||
|
||||
def project(self, vars: Container[Variable]) -> FrozenBindings:
|
||||
return FrozenBindings(self.ctx, (x for x in self.items() if x[0] in vars))
|
||||
|
||||
def merge(self, other: t.Mapping[Identifier, Identifier]) -> FrozenBindings:
|
||||
res = FrozenBindings(self.ctx, itertools.chain(self.items(), other.items()))
|
||||
return res
|
||||
|
||||
@property
|
||||
def now(self) -> datetime.datetime:
|
||||
return self.ctx.now
|
||||
|
||||
@property
|
||||
def bnodes(self) -> t.Mapping[Identifier, BNode]:
|
||||
return self.ctx.bnodes
|
||||
|
||||
@property
|
||||
def prologue(self) -> Optional[Prologue]:
|
||||
return self.ctx.prologue
|
||||
|
||||
def forget(
|
||||
self, before: QueryContext, _except: Optional[Container[Variable]] = None
|
||||
) -> FrozenBindings:
|
||||
"""
|
||||
return a frozen dict only of bindings made in self
|
||||
since before
|
||||
"""
|
||||
if not _except:
|
||||
_except = []
|
||||
|
||||
# bindings from initBindings are newer forgotten
|
||||
return FrozenBindings(
|
||||
self.ctx,
|
||||
(
|
||||
x
|
||||
for x in self.items()
|
||||
if (
|
||||
x[0] in _except
|
||||
# type error: Unsupported right operand type for in ("Optional[Dict[Variable, Identifier]]")
|
||||
or x[0] in self.ctx.initBindings # type: ignore[operator]
|
||||
or before[x[0]] is None
|
||||
)
|
||||
),
|
||||
)
|
||||
|
||||
def remember(self, these) -> FrozenBindings:
|
||||
"""
|
||||
return a frozen dict only of bindings in these
|
||||
"""
|
||||
return FrozenBindings(self.ctx, (x for x in self.items() if x[0] in these))
|
||||
|
||||
|
||||
class QueryContext:
|
||||
"""
|
||||
Query context - passed along when evaluating the query
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
graph: Optional[Graph] = None,
|
||||
bindings: Optional[Union[Bindings, FrozenBindings, List[Any]]] = None,
|
||||
initBindings: Optional[Mapping[str, Identifier]] = None,
|
||||
datasetClause=None,
|
||||
):
|
||||
self.initBindings = initBindings
|
||||
self.bindings = Bindings(d=bindings or [])
|
||||
if initBindings:
|
||||
self.bindings.update(initBindings)
|
||||
|
||||
self.graph: Optional[Graph]
|
||||
self._dataset: Optional[Union[Dataset, ConjunctiveGraph]]
|
||||
if isinstance(graph, (Dataset, ConjunctiveGraph)):
|
||||
if datasetClause:
|
||||
self._dataset = Dataset()
|
||||
self.graph = Graph()
|
||||
for d in datasetClause:
|
||||
if d.default:
|
||||
from_graph = graph.get_context(d.default)
|
||||
self.graph += from_graph
|
||||
if not from_graph:
|
||||
self.load(d.default, default=True)
|
||||
elif d.named:
|
||||
namedGraphs = Graph(
|
||||
store=self.dataset.store, identifier=d.named
|
||||
)
|
||||
from_named_graphs = graph.get_context(d.named)
|
||||
namedGraphs += from_named_graphs
|
||||
if not from_named_graphs:
|
||||
self.load(d.named, default=False)
|
||||
else:
|
||||
self._dataset = graph
|
||||
if rdflib.plugins.sparql.SPARQL_DEFAULT_GRAPH_UNION:
|
||||
self.graph = self.dataset
|
||||
else:
|
||||
self.graph = self.dataset.default_context
|
||||
else:
|
||||
self._dataset = None
|
||||
self.graph = graph
|
||||
|
||||
self.prologue: Optional[Prologue] = None
|
||||
self._now: Optional[datetime.datetime] = None
|
||||
|
||||
self.bnodes: t.MutableMapping[Identifier, BNode] = collections.defaultdict(
|
||||
BNode
|
||||
)
|
||||
|
||||
@property
|
||||
def now(self) -> datetime.datetime:
|
||||
if self._now is None:
|
||||
self._now = datetime.datetime.now(datetime.timezone.utc)
|
||||
return self._now
|
||||
|
||||
def clone(
|
||||
self, bindings: Optional[Union[FrozenBindings, Bindings, List[Any]]] = None
|
||||
) -> QueryContext:
|
||||
r = QueryContext(
|
||||
self._dataset if self._dataset is not None else self.graph,
|
||||
bindings or self.bindings,
|
||||
initBindings=self.initBindings,
|
||||
)
|
||||
r.prologue = self.prologue
|
||||
r.graph = self.graph
|
||||
r.bnodes = self.bnodes
|
||||
return r
|
||||
|
||||
@property
|
||||
def dataset(self) -> ConjunctiveGraph:
|
||||
""" "current dataset"""
|
||||
if self._dataset is None:
|
||||
raise Exception(
|
||||
"You performed a query operation requiring "
|
||||
+ "a dataset (i.e. ConjunctiveGraph), but "
|
||||
+ "operating currently on a single graph."
|
||||
)
|
||||
return self._dataset
|
||||
|
||||
def load(
|
||||
self,
|
||||
source: URIRef,
|
||||
default: bool = False,
|
||||
into: Optional[Identifier] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""
|
||||
Load data from the source into the query context's.
|
||||
|
||||
:param source: The source to load from.
|
||||
:param default: If `True`, triples from the source will be added
|
||||
to the default graph, otherwise it will be loaded into a
|
||||
graph with ``source`` URI as its name.
|
||||
:param into: The name of the graph to load the data into. If
|
||||
`None`, the source URI will be used as as the name of the
|
||||
graph.
|
||||
:param kwargs: Keyword arguments to pass to
|
||||
:meth:`rdflib.graph.Graph.parse`.
|
||||
"""
|
||||
|
||||
def _load(graph, source):
|
||||
try:
|
||||
return graph.parse(source, format="turtle", **kwargs)
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
return graph.parse(source, format="xml", **kwargs)
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
return graph.parse(source, format="n3", **kwargs)
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
return graph.parse(source, format="nt", **kwargs)
|
||||
except Exception:
|
||||
raise Exception(
|
||||
"Could not load %s as either RDF/XML, N3 or NTriples" % source
|
||||
)
|
||||
|
||||
if not rdflib.plugins.sparql.SPARQL_LOAD_GRAPHS:
|
||||
# we are not loading - if we already know the graph
|
||||
# being "loaded", just add it to the default-graph
|
||||
if default:
|
||||
# Unsupported left operand type for + ("None")
|
||||
self.graph += self.dataset.get_context(source) # type: ignore[operator]
|
||||
else:
|
||||
if default:
|
||||
_load(self.graph, source)
|
||||
else:
|
||||
if into is None:
|
||||
into = source
|
||||
_load(self.dataset.get_context(into), source)
|
||||
|
||||
def __getitem__(self, key: Union[str, Path]) -> Optional[Union[str, Path]]:
|
||||
# in SPARQL BNodes are just labels
|
||||
if not isinstance(key, (BNode, Variable)):
|
||||
return key
|
||||
try:
|
||||
return self.bindings[key]
|
||||
except KeyError:
|
||||
return None
|
||||
|
||||
def get(self, key: str, default: Optional[Any] = None) -> Any:
|
||||
try:
|
||||
return self[key]
|
||||
except KeyError:
|
||||
return default
|
||||
|
||||
def solution(self, vars: Optional[Iterable[Variable]] = None) -> FrozenBindings:
|
||||
"""
|
||||
Return a static copy of the current variable bindings as dict
|
||||
"""
|
||||
if vars:
|
||||
return FrozenBindings(
|
||||
self, ((k, v) for k, v in self.bindings.items() if k in vars)
|
||||
)
|
||||
else:
|
||||
return FrozenBindings(self, self.bindings.items())
|
||||
|
||||
def __setitem__(self, key: str, value: str) -> None:
|
||||
if key in self.bindings and self.bindings[key] != value:
|
||||
raise AlreadyBound()
|
||||
|
||||
self.bindings[key] = value
|
||||
|
||||
def pushGraph(self, graph: Optional[Graph]) -> QueryContext:
|
||||
r = self.clone()
|
||||
r.graph = graph
|
||||
return r
|
||||
|
||||
def push(self) -> QueryContext:
|
||||
r = self.clone(Bindings(self.bindings))
|
||||
return r
|
||||
|
||||
def clean(self) -> QueryContext:
|
||||
return self.clone([])
|
||||
|
||||
def thaw(self, frozenbindings: FrozenBindings) -> QueryContext:
|
||||
"""
|
||||
Create a new read/write query context from the given solution
|
||||
"""
|
||||
c = self.clone(frozenbindings)
|
||||
|
||||
return c
|
||||
|
||||
|
||||
class Prologue:
|
||||
"""
|
||||
A class for holding prefixing bindings and base URI information
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.base: Optional[str] = None
|
||||
self.namespace_manager = NamespaceManager(Graph()) # ns man needs a store
|
||||
|
||||
def resolvePName(self, prefix: Optional[str], localname: Optional[str]) -> URIRef:
|
||||
ns = self.namespace_manager.store.namespace(prefix or "")
|
||||
if ns is None:
|
||||
raise Exception("Unknown namespace prefix : %s" % prefix)
|
||||
return URIRef(ns + (localname or ""))
|
||||
|
||||
def bind(self, prefix: Optional[str], uri: Any) -> None:
|
||||
self.namespace_manager.bind(prefix, uri, replace=True)
|
||||
|
||||
def absolutize(
|
||||
self, iri: Optional[Union[CompValue, str]]
|
||||
) -> Optional[Union[CompValue, str]]:
|
||||
"""
|
||||
Apply BASE / PREFIXes to URIs
|
||||
(and to datatypes in Literals)
|
||||
|
||||
TODO: Move resolving URIs to pre-processing
|
||||
"""
|
||||
|
||||
if isinstance(iri, CompValue):
|
||||
if iri.name == "pname":
|
||||
return self.resolvePName(iri.prefix, iri.localname)
|
||||
if iri.name == "literal":
|
||||
# type error: Argument "datatype" to "Literal" has incompatible type "Union[CompValue, Identifier, None]"; expected "Optional[str]"
|
||||
return Literal(
|
||||
iri.string, lang=iri.lang, datatype=self.absolutize(iri.datatype) # type: ignore[arg-type]
|
||||
)
|
||||
elif isinstance(iri, URIRef) and not ":" in iri: # noqa: E713
|
||||
return URIRef(iri, base=self.base)
|
||||
|
||||
return iri
|
||||
|
||||
|
||||
class Query:
|
||||
"""
|
||||
A parsed and translated query
|
||||
"""
|
||||
|
||||
def __init__(self, prologue: Prologue, algebra: CompValue):
|
||||
self.prologue = prologue
|
||||
self.algebra = algebra
|
||||
self._original_args: Tuple[str, Mapping[str, str], Optional[str]]
|
||||
|
||||
|
||||
class Update:
|
||||
"""
|
||||
A parsed and translated update
|
||||
"""
|
||||
|
||||
def __init__(self, prologue: Prologue, algebra: List[CompValue]):
|
||||
self.prologue = prologue
|
||||
self.algebra = algebra
|
||||
self._original_args: Tuple[str, Mapping[str, str], Optional[str]]
|
||||
@@ -0,0 +1,353 @@
|
||||
"""
|
||||
|
||||
Code for carrying out Update Operations
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Iterator, Mapping, Optional, Sequence
|
||||
|
||||
from rdflib.graph import Graph
|
||||
from rdflib.plugins.sparql.evaluate import evalBGP, evalPart
|
||||
from rdflib.plugins.sparql.evalutils import _fillTemplate, _join
|
||||
from rdflib.plugins.sparql.parserutils import CompValue
|
||||
from rdflib.plugins.sparql.sparql import FrozenDict, QueryContext, Update
|
||||
from rdflib.term import Identifier, URIRef, Variable
|
||||
|
||||
|
||||
def _graphOrDefault(ctx: QueryContext, g: str) -> Optional[Graph]:
|
||||
if g == "DEFAULT":
|
||||
return ctx.graph
|
||||
else:
|
||||
return ctx.dataset.get_context(g)
|
||||
|
||||
|
||||
def _graphAll(ctx: QueryContext, g: str) -> Sequence[Graph]:
|
||||
"""
|
||||
return a list of graphs
|
||||
"""
|
||||
if g == "DEFAULT":
|
||||
# type error: List item 0 has incompatible type "Optional[Graph]"; expected "Graph"
|
||||
return [ctx.graph] # type: ignore[list-item]
|
||||
elif g == "NAMED":
|
||||
return [
|
||||
# type error: Item "None" of "Optional[Graph]" has no attribute "identifier"
|
||||
c
|
||||
for c in ctx.dataset.contexts()
|
||||
if c.identifier != ctx.graph.identifier # type: ignore[union-attr]
|
||||
]
|
||||
elif g == "ALL":
|
||||
return list(ctx.dataset.contexts())
|
||||
else:
|
||||
return [ctx.dataset.get_context(g)]
|
||||
|
||||
|
||||
def evalLoad(ctx: QueryContext, u: CompValue) -> None:
|
||||
"""
|
||||
http://www.w3.org/TR/sparql11-update/#load
|
||||
"""
|
||||
|
||||
if TYPE_CHECKING:
|
||||
assert isinstance(u.iri, URIRef)
|
||||
|
||||
if u.graphiri:
|
||||
ctx.load(u.iri, default=False, into=u.graphiri)
|
||||
else:
|
||||
ctx.load(u.iri, default=True)
|
||||
|
||||
|
||||
def evalCreate(ctx: QueryContext, u: CompValue) -> None:
|
||||
"""
|
||||
http://www.w3.org/TR/sparql11-update/#create
|
||||
"""
|
||||
g = ctx.dataset.get_context(u.graphiri)
|
||||
if len(g) > 0:
|
||||
raise Exception("Graph %s already exists." % g.identifier)
|
||||
raise Exception("Create not implemented!")
|
||||
|
||||
|
||||
def evalClear(ctx: QueryContext, u: CompValue) -> None:
|
||||
"""
|
||||
http://www.w3.org/TR/sparql11-update/#clear
|
||||
"""
|
||||
for g in _graphAll(ctx, u.graphiri):
|
||||
g.remove((None, None, None))
|
||||
|
||||
|
||||
def evalDrop(ctx: QueryContext, u: CompValue) -> None:
|
||||
"""
|
||||
http://www.w3.org/TR/sparql11-update/#drop
|
||||
"""
|
||||
if ctx.dataset.store.graph_aware:
|
||||
for g in _graphAll(ctx, u.graphiri):
|
||||
ctx.dataset.store.remove_graph(g)
|
||||
else:
|
||||
evalClear(ctx, u)
|
||||
|
||||
|
||||
def evalInsertData(ctx: QueryContext, u: CompValue) -> None:
|
||||
"""
|
||||
http://www.w3.org/TR/sparql11-update/#insertData
|
||||
"""
|
||||
# add triples
|
||||
g = ctx.graph
|
||||
g += u.triples
|
||||
# add quads
|
||||
# u.quads is a dict of graphURI=>[triples]
|
||||
for g in u.quads:
|
||||
# type error: Argument 1 to "get_context" of "ConjunctiveGraph" has incompatible type "Optional[Graph]"; expected "Union[IdentifiedNode, str, None]"
|
||||
cg = ctx.dataset.get_context(g) # type: ignore[arg-type]
|
||||
cg += u.quads[g]
|
||||
|
||||
|
||||
def evalDeleteData(ctx: QueryContext, u: CompValue) -> None:
|
||||
"""
|
||||
http://www.w3.org/TR/sparql11-update/#deleteData
|
||||
"""
|
||||
# remove triples
|
||||
g = ctx.graph
|
||||
g -= u.triples
|
||||
|
||||
# remove quads
|
||||
# u.quads is a dict of graphURI=>[triples]
|
||||
for g in u.quads:
|
||||
# type error: Argument 1 to "get_context" of "ConjunctiveGraph" has incompatible type "Optional[Graph]"; expected "Union[IdentifiedNode, str, None]"
|
||||
cg = ctx.dataset.get_context(g) # type: ignore[arg-type]
|
||||
cg -= u.quads[g]
|
||||
|
||||
|
||||
def evalDeleteWhere(ctx: QueryContext, u: CompValue) -> None:
|
||||
"""
|
||||
http://www.w3.org/TR/sparql11-update/#deleteWhere
|
||||
"""
|
||||
|
||||
res: Iterator[FrozenDict] = evalBGP(ctx, u.triples)
|
||||
for g in u.quads:
|
||||
cg = ctx.dataset.get_context(g)
|
||||
c = ctx.pushGraph(cg)
|
||||
res = _join(res, list(evalBGP(c, u.quads[g])))
|
||||
|
||||
# type error: Incompatible types in assignment (expression has type "FrozenBindings", variable has type "QueryContext")
|
||||
for c in res: # type: ignore[assignment]
|
||||
g = ctx.graph
|
||||
g -= _fillTemplate(u.triples, c)
|
||||
|
||||
for g in u.quads:
|
||||
cg = ctx.dataset.get_context(c.get(g))
|
||||
cg -= _fillTemplate(u.quads[g], c)
|
||||
|
||||
|
||||
def evalModify(ctx: QueryContext, u: CompValue) -> None:
|
||||
originalctx = ctx
|
||||
|
||||
# Using replaces the dataset for evaluating the where-clause
|
||||
dg: Optional[Graph]
|
||||
if u.using:
|
||||
otherDefault = False
|
||||
for d in u.using:
|
||||
if d.default:
|
||||
if not otherDefault:
|
||||
# replace current default graph
|
||||
dg = Graph()
|
||||
ctx = ctx.pushGraph(dg)
|
||||
otherDefault = True
|
||||
|
||||
ctx.load(d.default, default=True)
|
||||
|
||||
elif d.named:
|
||||
g = d.named
|
||||
ctx.load(g, default=False)
|
||||
|
||||
# "The WITH clause provides a convenience for when an operation
|
||||
# primarily refers to a single graph. If a graph name is specified
|
||||
# in a WITH clause, then - for the purposes of evaluating the
|
||||
# WHERE clause - this will define an RDF Dataset containing a
|
||||
# default graph with the specified name, but only in the absence
|
||||
# of USING or USING NAMED clauses. In the presence of one or more
|
||||
# graphs referred to in USING clauses and/or USING NAMED clauses,
|
||||
# the WITH clause will be ignored while evaluating the WHERE
|
||||
# clause."
|
||||
if not u.using and u.withClause:
|
||||
g = ctx.dataset.get_context(u.withClause)
|
||||
ctx = ctx.pushGraph(g)
|
||||
|
||||
res = evalPart(ctx, u.where)
|
||||
|
||||
if u.using:
|
||||
if otherDefault:
|
||||
ctx = originalctx # restore original default graph
|
||||
if u.withClause:
|
||||
g = ctx.dataset.get_context(u.withClause)
|
||||
ctx = ctx.pushGraph(g)
|
||||
|
||||
for c in res:
|
||||
dg = ctx.graph
|
||||
if u.delete:
|
||||
# type error: Unsupported left operand type for - ("None")
|
||||
# type error: Unsupported operand types for - ("Graph" and "Generator[Tuple[Identifier, Identifier, Identifier], None, None]")
|
||||
dg -= _fillTemplate(u.delete.triples, c) # type: ignore[operator]
|
||||
|
||||
for g, q in u.delete.quads.items():
|
||||
cg = ctx.dataset.get_context(c.get(g))
|
||||
cg -= _fillTemplate(q, c)
|
||||
|
||||
if u.insert:
|
||||
# type error: Unsupported left operand type for + ("None")
|
||||
# type error: Unsupported operand types for + ("Graph" and "Generator[Tuple[Identifier, Identifier, Identifier], None, None]")
|
||||
dg += _fillTemplate(u.insert.triples, c) # type: ignore[operator]
|
||||
|
||||
for g, q in u.insert.quads.items():
|
||||
cg = ctx.dataset.get_context(c.get(g))
|
||||
cg += _fillTemplate(q, c)
|
||||
|
||||
|
||||
def evalAdd(ctx: QueryContext, u: CompValue) -> None:
|
||||
"""
|
||||
|
||||
add all triples from src to dst
|
||||
|
||||
http://www.w3.org/TR/sparql11-update/#add
|
||||
"""
|
||||
src, dst = u.graph
|
||||
|
||||
srcg = _graphOrDefault(ctx, src)
|
||||
dstg = _graphOrDefault(ctx, dst)
|
||||
|
||||
# type error: Item "None" of "Optional[Graph]" has no attribute "identifier"
|
||||
if srcg.identifier == dstg.identifier: # type: ignore[union-attr]
|
||||
return
|
||||
|
||||
# type error: Unsupported left operand type for + ("None")
|
||||
dstg += srcg # type: ignore[operator]
|
||||
|
||||
|
||||
def evalMove(ctx: QueryContext, u: CompValue) -> None:
|
||||
"""
|
||||
|
||||
remove all triples from dst
|
||||
add all triples from src to dst
|
||||
remove all triples from src
|
||||
|
||||
http://www.w3.org/TR/sparql11-update/#move
|
||||
"""
|
||||
|
||||
src, dst = u.graph
|
||||
|
||||
srcg = _graphOrDefault(ctx, src)
|
||||
dstg = _graphOrDefault(ctx, dst)
|
||||
|
||||
# type error: Item "None" of "Optional[Graph]" has no attribute "identifier"
|
||||
if srcg.identifier == dstg.identifier: # type: ignore[union-attr]
|
||||
return
|
||||
|
||||
# type error: Item "None" of "Optional[Graph]" has no attribute "remove"
|
||||
dstg.remove((None, None, None)) # type: ignore[union-attr]
|
||||
|
||||
# type error: Unsupported left operand type for + ("None")
|
||||
dstg += srcg # type: ignore[operator]
|
||||
|
||||
if ctx.dataset.store.graph_aware:
|
||||
# type error: Argument 1 to "remove_graph" of "Store" has incompatible type "Optional[Graph]"; expected "Graph"
|
||||
ctx.dataset.store.remove_graph(srcg) # type: ignore[arg-type]
|
||||
else:
|
||||
# type error: Item "None" of "Optional[Graph]" has no attribute "remove"
|
||||
srcg.remove((None, None, None)) # type: ignore[union-attr]
|
||||
|
||||
|
||||
def evalCopy(ctx: QueryContext, u: CompValue) -> None:
|
||||
"""
|
||||
|
||||
remove all triples from dst
|
||||
add all triples from src to dst
|
||||
|
||||
http://www.w3.org/TR/sparql11-update/#copy
|
||||
"""
|
||||
|
||||
src, dst = u.graph
|
||||
|
||||
srcg = _graphOrDefault(ctx, src)
|
||||
dstg = _graphOrDefault(ctx, dst)
|
||||
|
||||
# type error: Item "None" of "Optional[Graph]" has no attribute "remove"
|
||||
if srcg.identifier == dstg.identifier: # type: ignore[union-attr]
|
||||
return
|
||||
|
||||
# type error: Item "None" of "Optional[Graph]" has no attribute "remove"
|
||||
dstg.remove((None, None, None)) # type: ignore[union-attr]
|
||||
|
||||
# type error: Unsupported left operand type for + ("None")
|
||||
dstg += srcg # type: ignore[operator]
|
||||
|
||||
|
||||
def evalUpdate(
|
||||
graph: Graph,
|
||||
update: Update,
|
||||
initBindings: Optional[Mapping[str, Identifier]] = None,
|
||||
) -> None:
|
||||
"""
|
||||
|
||||
http://www.w3.org/TR/sparql11-update/#updateLanguage
|
||||
|
||||
'A request is a sequence of operations [...] Implementations MUST
|
||||
ensure that operations of a single request are executed in a
|
||||
fashion that guarantees the same effects as executing them in
|
||||
lexical order.
|
||||
|
||||
Operations all result either in success or failure.
|
||||
|
||||
If multiple operations are present in a single request, then a
|
||||
result of failure from any operation MUST abort the sequence of
|
||||
operations, causing the subsequent operations to be ignored.'
|
||||
|
||||
This will return None on success and raise Exceptions on error
|
||||
|
||||
.. caution::
|
||||
|
||||
This method can access indirectly requested network endpoints, for
|
||||
example, query processing will attempt to access network endpoints
|
||||
specified in ``SERVICE`` directives.
|
||||
|
||||
When processing untrusted or potentially malicious queries, measures
|
||||
should be taken to restrict network and file access.
|
||||
|
||||
For information on available security measures, see the RDFLib
|
||||
:doc:`Security Considerations </security_considerations>`
|
||||
documentation.
|
||||
|
||||
"""
|
||||
|
||||
for u in update.algebra:
|
||||
initBindings = dict((Variable(k), v) for k, v in (initBindings or {}).items())
|
||||
|
||||
ctx = QueryContext(graph, initBindings=initBindings)
|
||||
ctx.prologue = u.prologue
|
||||
|
||||
try:
|
||||
if u.name == "Load":
|
||||
evalLoad(ctx, u)
|
||||
elif u.name == "Clear":
|
||||
evalClear(ctx, u)
|
||||
elif u.name == "Drop":
|
||||
evalDrop(ctx, u)
|
||||
elif u.name == "Create":
|
||||
evalCreate(ctx, u)
|
||||
elif u.name == "Add":
|
||||
evalAdd(ctx, u)
|
||||
elif u.name == "Move":
|
||||
evalMove(ctx, u)
|
||||
elif u.name == "Copy":
|
||||
evalCopy(ctx, u)
|
||||
elif u.name == "InsertData":
|
||||
evalInsertData(ctx, u)
|
||||
elif u.name == "DeleteData":
|
||||
evalDeleteData(ctx, u)
|
||||
elif u.name == "DeleteWhere":
|
||||
evalDeleteWhere(ctx, u)
|
||||
elif u.name == "Modify":
|
||||
evalModify(ctx, u)
|
||||
else:
|
||||
raise Exception("Unknown update operation: %s" % (u,))
|
||||
except: # noqa: E722
|
||||
if not u.silent:
|
||||
raise
|
||||
Reference in New Issue
Block a user