"""The registry stores all local CUDS objects."""
import logging
from uuid import UUID
from rdflib import URIRef
import osp.core.warnings as warning_settings
logger = logging.getLogger(__name__)
[docs]class Registry(dict):
"""A dictionary that contains all local CUDS objects."""
# TODO let the registry act on the graph only.
# Don't maintain this separate dict.
def __setitem__(self, key, value):
"""Enforce the use of put()."""
message = "Operation not supported."
raise TypeError(message)
def __getitem__(self, key):
"""Enforce the use of get()."""
message = "Operation not supported."
raise TypeError(message)
[docs] def put(self, cuds_object):
"""Add an object to the registry.
Args:
cuds_object (Cuds): The cuds_object to put in the registry.
Raises:
ValueError: Unsupported object provided (not a Cuds object).
"""
from osp.core.cuds import Cuds
if isinstance(cuds_object, Cuds):
super().__setitem__(cuds_object.uid, cuds_object)
else:
message = "{!r} is not a cuds"
raise ValueError(message.format(cuds_object))
[docs] def get(self, uid):
"""Return the object corresponding to a given uid.
Args:
uid (Union[UUID, URIRef]): The uid of the desired
object.
Raises:
ValueError: Unsupported key provided (not a uid object).
Returns:
Cuds: Cuds object with the uid.
"""
if isinstance(uid, (UUID, URIRef)):
return super().__getitem__(uid)
else:
message = "{!r} is not a proper uid"
raise ValueError(message.format(uid))
[docs] def get_subtree(
self, root, subtree=None, rel=None, skip=None, warning=None
):
"""Get all the elements in the subtree rooted at given root.
Only use the given relationship for traversal.
Args:
root (Union[UUID, URIRef, Cuds]): The root of the subtree.
rel (Relationship, optional): The relationship used for traversal.
Defaults to None. Defaults to None.
subtree (Set[Cuds]): Currently calculated subtree (this is a
recursive algorithm).
skip (Set[Cuds], optional): The elements to skip. Defaults to None.
Defaults to None.
warning (LargeDatasetWarning, optional): Raise a
`LargeDatasetWarning` when the subtree is large. When `None`,
no warning is raised. If you wish to raise the warning, a
`LargeDatasetWarning` object must be provided.
Returns:
Set[Cuds]: The set of elements in the subtree rooted in the given
uid.
"""
if isinstance(root, (UUID, URIRef)):
root = super().__getitem__(root)
assert root.uid in self
skip = skip or set() | {root}
skip |= {root}
subtree = subtree or {root}
subclasses = set() if rel is None else rel.subclasses
subclass_check = (
(lambda r: True) if not subclasses else (lambda r: r in subclasses)
)
"""Checks whether relationship `x` should be considered.
- When no `rel` is provided, `subclass_check` should always return
True, as all relationships should be considered.
- When `rel` is provided, it should return true only if the
relationship `x` is a subclass of the provided relationship (`rel`).
"""
# Load neighbors connected through the relationship
filtered_neighbors = (
neighbor
for r, dict_target in root._neighbors.items()
if subclass_check(r)
for neighbor in dict_target
)
filtered_neighbors = set(root.session.load(*filtered_neighbors))
subtree |= filtered_neighbors
# Optional: raise a `LargeDatasetWarning` if the subtree is too large.
if (
warning is not None
and len(subtree)
> warning_settings.unreachable_cuds_objects_large_dataset_size
):
warning.warn()
warning = None
for neighbor in filter(lambda x: x not in skip, filtered_neighbors):
self.get_subtree(
neighbor, subtree=subtree, rel=rel, skip=skip, warning=warning
)
return subtree
[docs] def prune(self, *roots, rel=None):
"""Remove all elements in the registry that are not reachable.
Args:
rel (Relationship, optional):Only consider this relationship.
Defaults to None.
Returns:
List[Cuds]: The set of removed elements.
"""
logger.warning(
"Registry.prune() is deprecated. " "Use Session.prune() instead."
)
not_reachable = self._get_not_reachable(*roots, rel=rel)
for x in not_reachable:
super().__delitem__(x.uid)
return not_reachable
def _get_not_reachable(
self, *roots, rel=None, return_reachable=False, warning=None
):
"""Get all elements in the registry that are not reachable.
Use the given rel for traversal.
Args:
*roots (Union[UUID, URIRef, Cuds]): Get all elements not reachable
from these root elements.
rel (Relationship, optional): Only use this relationship for
traversal. Defaults to None.
return_reachable (bool): Returns also the uids of the reachable
cuds.
Returns:
Union[List[Cuds],
Tuple[List[Cuds], Set[Union[UUID, URIRef]]]]: Either a
list of the unreachable CUDS when `return_reachable` is False
or a tuple whose first element is such list, and second
element a set with the uids of the reachable cuds.
"""
# Get all reachable Cuds objects
reachable = set()
for root in roots:
reachable |= self.get_subtree(
root, rel=rel, skip=reachable, warning=warning
)
reachable_uids = set([r.uid for r in reachable])
# Get all the Cuds objects that are not reachable
delete = list()
for uid in self.keys():
if uid not in reachable_uids:
delete.append(super().__getitem__(uid))
return delete if not return_reachable else (delete, reachable_uids)
[docs] def reset(self):
"""Delete the contents of the registry."""
keys = set(self.keys())
for key in keys:
del self[key]
[docs] def filter(self, criterion):
"""Filter the registry.
Return a dictionary that is
a subset of the registry. It contains only cuds objects
that satisfy the given criterion.
Args:
criterion (Callable[Cuds, bool]): A function that decides whether
a cuds object should be returned. If the function returns True
on a cuds object it means the cuds object satisfies the
criterion.
Returns:
Dict[Union[UUID, URIRef], Cuds]: dict contains the cuds objects
satisfying the criterion.
"""
result = dict()
for uid, cuds_object in super().items():
if criterion(cuds_object):
result[uid] = cuds_object
return result
[docs] def filter_by_oclass(self, oclass):
"""Filter the registry by ontology class.
Args:
oclass (OntologyClass): The oclass used for filtering.
Returns:
Dict[Union[UUID, URIRef], Cuds]: A subset of the registry,
containing cuds objects with given ontology class.
"""
return self.filter(lambda x: x.oclass == oclass)
[docs] def filter_by_attribute(self, attribute, value):
"""Filter by attribute and value.
Args:
attribute (str): The attribute to look for.
value (Any): The corresponding value to look for.
Returns:
Dict[Union[UUID, URIRef], Cuds]: A subset of the registry,
containing cuds objects with given attribute and value.
"""
return self.filter(
lambda x: hasattr(x, attribute) and getattr(x, attribute) == value
)
[docs] def filter_by_relationships(
self, relationship, consider_subrelationships=False
):
"""Filter the registry by relationships.
Return cuds objects containing the given relationship.
Args:
relationship (OntologyRelationship): The relationship to filter by.
consider_subrelationships (bool, optional): Whether to return CUDS
objects containing subrelationships of the given relationship.
Defaults to False.
Returns:
Dict[Union[UUID, URIRef], Cuds]: A subset of the registry,
containing cuds objects with given relationship.
"""
if consider_subrelationships:
def criterion(cuds_object):
for rel in cuds_object._neighbors.keys():
if relationship.is_superclass_of(rel):
return True
return False
else:
def criterion(cuds_object):
return relationship in cuds_object._neighbors
return self.filter(criterion)