Source code for pronto.entity

import datetime
import operator
import typing
import weakref
from typing import AbstractSet, Any, Dict, FrozenSet, Iterable, Iterator, Optional, Set

from ..definition import Definition
from ..pv import PropertyValue
from ..synonym import Synonym, SynonymData, SynonymType
from ..utils.meta import roundrepr, typechecked
from ..xref import Xref

if typing.TYPE_CHECKING:
    from ..ontology import _DataGraph, Ontology
    from ..relationship import Relationship, RelationshipSet
    from .attributes import Relationships

__all__ = ["EntityData", "Entity", "EntitySet"]
_D = typing.TypeVar("_D", bound="EntityData")
_E = typing.TypeVar("_E", bound="Entity")
_S = typing.TypeVar("_S", bound="EntitySet")


class EntityData:

    id: str
    alternate_ids: Set[str]
    annotations: Set[PropertyValue]
    anonymous: bool
    builtin: bool
    comment: Optional[str]
    consider: Set[str]
    created_by: Optional[str]
    creation_date: Optional[datetime.datetime]
    disjoint_from: Set[str]
    definition: Optional[Definition]
    equivalent_to: Set[str]
    name: Optional[str]
    namespace: Optional[str]
    obsolete: bool
    relationships: Dict[str, Set[str]]
    replaced_by: Set[str]
    subsets: Set[str]
    synonyms: Set[SynonymData]
    union_of: Set[str]
    xrefs: Set[Xref]

    if typing.TYPE_CHECKING:
        __annotations__: Dict[str, str]

    __slots__ = ("__weakref__",) + tuple(__annotations__)  # noqa: E0602


[docs] class Entity(typing.Generic[_D, _S]): """An entity in the ontology graph. With respects to the OBO semantics, an `Entity` is either a term or a relationship in the ontology graph. Any entity has a unique identifier as well as some common properties. """ if __debug__ or typing.TYPE_CHECKING: __data: "weakref.ReferenceType[_D]" __slots__: Iterable[str] = () def __init__(self, ontology: "Ontology", data: "_D"): self.__data = weakref.ref(data) self.__id = data.id self.__ontology = ontology def _data(self) -> "EntityData": rdata = self.__data() if rdata is None: raise RuntimeError("internal data was deallocated") return rdata else: __slots__: Iterable[str] = ("_data",) # type: ignore def __init__(self, ontology: "Ontology", data: "_D"): self._data = weakref.ref(data) # type: ignore self.__ontology = ontology self.__id = data.id _Set: typing.ClassVar[typing.Type[_S]] = NotImplemented _data_getter: typing.Callable[["Ontology"], "_DataGraph"] = NotImplemented # --- Private helpers ---------------------------------------------------- def _ontology(self) -> "Ontology": return self.__ontology # --- Magic Methods ------------------------------------------------------
[docs] def __eq__(self, other: Any) -> bool: if isinstance(other, Entity): return self.id == other.id return False
[docs] def __lt__(self, other): if isinstance(other, Entity): return self.id < other.id return NotImplemented
[docs] def __le__(self, other): if isinstance(other, Entity): return self.id <= other.id return NotImplemented
[docs] def __gt__(self, other): if isinstance(other, Entity): return self.id > other.id return NotImplemented
[docs] def __ge__(self, other): if isinstance(other, Entity): return self.id >= other.id return NotImplemented
[docs] def __hash__(self): return hash((self.id))
[docs] def __repr__(self): return roundrepr.make(type(self).__name__, self.id, name=(self.name, None))
# --- Data descriptors --------------------------------------------------- @property def alternate_ids(self) -> Set[str]: """`set` of `str`: A set of alternate IDs for this entity.""" return self._data().alternate_ids @alternate_ids.setter # type: ignore def alternate_ids(self, ids: Iterable[str]): self._data().alternate_ids = set(ids) @property def annotations(self) -> Set[PropertyValue]: """`set` of `PropertyValue`: Annotations relevant to the entity.""" return self._data().annotations @annotations.setter def annotations(self, value: Iterable[PropertyValue]) -> None: self._data().annotations = set(value) @property def anonymous(self) -> bool: """`bool`: Whether or not the entity has an anonymous id. Semantics of anonymous entities are the same as B-Nodes in RDF. """ return self._data().anonymous @anonymous.setter def anonymous(self, value: bool): self._data().anonymous = value @property def builtin(self) -> bool: """`bool`: Whether or not the entity is built-in to the OBO format. ``pronto`` uses this tag on the ``is_a`` relationship, which is the axiomatic to the OBO language but treated as a relationship in the library. """ return self._data().builtin @builtin.setter # type: ignore @typechecked(property=True) def builtin(self, value: bool): self._data().builtin = value @property def comment(self) -> Optional[str]: """`str` or `None`: A comment about the current entity. Comments in ``comment`` clauses are guaranteed to be conserved by OBO parsers and serializers, unlike bang comments. A non `None` `comment` is semantically equivalent to a ``rdfs:comment`` in OWL2. When parsing from OWL, several RDF comments will be merged together into a single ``comment`` clause spanning over multiple lines. """ return self._data().comment @comment.setter def comment(self, value: Optional[str]): self._data().comment = value @property def consider(self) -> _S: """`EntitySet`: A set of potential substitutes for an obsolete term. An obsolete entity can provide one or more entities which may be appropriate substitutes, but needs to be looked at carefully by a human expert before the replacement is done. See Also: `~Entity.replaced_by`, which provides a set of entities suitable for automatic replacement. """ s = self._Set() s._ids = self._data().consider s._ontology = self._ontology() return s @consider.setter def consider(self, consider: Iterable[_E]) -> None: if isinstance(consider, EntitySet): data = consider._ids else: data = {entity.id for entity in consider} self._data().consider = data @property def created_by(self) -> Optional[str]: """`str` or `None`: The name of the creator of the entity, if any. This property gets translated to a ``dc:creator`` annotation in OWL2, which has very broad semantics. Some OBO ontologies may instead use other annotation properties such as the ones found in `Information Interchange Ontology <http://www.obofoundry.org/ontology/iao.html>`_, which can be accessed in the `annotations` attribute of the entity, if any. """ return self._data().created_by @created_by.setter # type: ignore @typechecked(property=True) def created_by(self, value: Optional[str]): self._data().created_by = value @property def creation_date(self) -> Optional[datetime.datetime]: """`~datetime.datetime` or `None`: The date the entity was created.""" return self._data().creation_date @creation_date.setter # type: ignore @typechecked(property=True) def creation_date(self, value: Optional[datetime.datetime]): self._data().creation_date = value @property def definition(self) -> Optional[Definition]: """`Definition` or `None`: The definition of the current entity. Definitions in OBO are intended to be human-readable text describing the entity, with some additional cross-references if possible. Example: >>> hp = pronto.Ontology.from_obo_library("hp.obo") >>> term = hp["HP:0009882"] >>> term.name 'Short distal phalanx of finger' >>> str(term.definition) 'Short distance from the end of the finger to the most distal...' >>> sorted(term.definition.xrefs) [Xref('HPO:probinson'), Xref('PMID:19125433')] """ return self._data().definition @definition.setter # type: ignore @typechecked(property=True) def definition(self, definition: Optional[Definition]): self._data().definition = definition @property def disjoint_from(self) -> _S: """`EntitySet`: The entities declared as disjoint from this entity. Two entities are disjoint if they have no instances in common. Two entities that are disjoint cannot share any subentities, but the opposite is not always true. """ s = self._Set() s._ids = self._data().disjoint_from s._ontology = self._ontology() return s @disjoint_from.setter def disjoint_from(self, disjoint: Iterable[_E]): if isinstance(disjoint, EntitySet): data = disjoint._ids else: data = {entity.id for entity in disjoint} self._data().disjoint_from = data @property def equivalent_to(self) -> _S: """`EntitySet`: The entities declared as equivalent to this entity.""" s = self._Set() s._ids = self._data().equivalent_to s._ontology = self._ontology() return s @equivalent_to.setter def equivalent_to(self, entities: Iterable[_E]): if isinstance(entities, EntitySet): data = entities._ids else: data = {entity.id for entity in entities} self._data().equivalent_to = data @property def id(self) -> str: """`str`: The OBO identifier of the entity. Identifiers can be either prefixed (e.g. ``MS:1000031``), unprefixed (e.g. ``part_of``) or given as plain URLs. Identifiers cannot be edited. """ return self.__id @property def name(self) -> Optional[str]: """`str` or `None`: The name of the entity. Names are formally equivalent to ``rdf:label`` in OWL2. The OBO format version 1.4 made names optional to improve OWL interoperability, as labels are optional in OWL. """ return self._data().name @name.setter # type: ignore @typechecked(property=True) def name(self, value: Optional[str]): self._data().name = value @property def namespace(self) -> Optional[str]: """`str` or `None`: The namespace this entity is defined in.""" return self._data().namespace @namespace.setter # type: ignore @typechecked(property=True) def namespace(self, ns: Optional[str]): self._data().namespace = ns @property def obsolete(self) -> bool: """`bool`: Whether or not the entity is obsolete. Hint: All OBO entities can be made obsolete through a boolean flag, and map to one or several replacements. When querying an obsolete entity, ``pronto`` will **not** attempt to perform any kind of replacement itself :: >>> ms = pronto.Ontology.from_obo_library("ms.obo") >>> term = ms["MS:1001414"] >>> term Term('MS:1001414', name='MGF scans') >>> term.obsolete True To always get the up-to-date, non-obsolete entity, you could use the following snippet, going through a term replacement if there is no ambiguity :: >>> while term.obsolete: ... if len(term.replaced_by) != 1: ... raise ValueError(f"no replacement for {term.id}") ... term = term.replaced_by.pop() >>> term Term('MS:1000797', name='peak list scans') See Also: `~.Entity.consider` and `~Entity.replaced_by`, storing some replacement options for an obsolete entity. """ return self._data().obsolete @obsolete.setter # type: ignore @typechecked(property=True) def obsolete(self, value: bool): self._data().obsolete = value @property def relationships(self: _E) -> "Relationships[_E, _S]": """`~.Relationships`: The links from an entity to other entities. This property returns an object that maps a `~.Relationship` to an `~.EntitySet` (either a `~.TermSet` for `Term.relationships`, or a `~.RelationshipSet` for `Relationship.relationships`). Hint: The mapping is mutable, so relationships can be created or removed using the usual interface of a `~collections.abc.MutableMapping`. Example: Get the ``MS:1000004`` term (*sample mass*) from the Mass Spectrometry ontology:: >>> ms = pronto.Ontology.from_obo_library("ms.obo") >>> sample_mass = ms["MS:1000004"] Then use the ``relationships`` property to get the relevant unit from the Unit Ontology:: >>> sorted(sample_mass.relationships.keys()) [Relationship('has_units', name='has_units')] >>> sample_mass.relationships[ms.get_relationship('has_units')] TermSet({Term('UO:0000021', name='gram')}) """ from .attributes import Relationships return Relationships(self) @relationships.setter def relationships(self, rels: typing.Mapping["Relationship", Iterable[_E]]): self._data().relationships = { relation.id: set(entity.id for entity in entities) for relation, entities in rels.items() } @property def replaced_by(self) -> _S: """`EntitySet`: A set of of replacements for an obsolete term. An obsolete entity can provide one or more replacement that can safely be used to automatically reassign instances to non-obsolete classes. See Also: `~Entity.consider`, which provides a set of entities suitable for replacement but requiring expert curation. """ s = self._Set() s._ids = self._data().replaced_by s._ontology = self._ontology() return s @replaced_by.setter def replaced_by(self, replacements: Iterable[_E]) -> None: if isinstance(replacements, EntitySet): data = replacements._ids else: data = set(entity.id for entity in replacements) self._data().replaced_by = data @property def subsets(self) -> FrozenSet[str]: """`frozenset` of `str`: The subsets containing this entity.""" return frozenset(self._data().subsets) @subsets.setter # type: ignore @typechecked(property=True) def subsets(self, subsets: FrozenSet[str]): declared = set(s.name for s in self._ontology().metadata.subsetdefs) for subset in subsets: if subset not in declared: raise ValueError(f"undeclared subset: {subset!r}") self._data().subsets = set(subsets) @property def synonyms(self) -> FrozenSet[Synonym]: """`frozenset` of `Synonym`: A set of synonyms for this entity.""" ontology, termdata = self._ontology(), self._data() return frozenset(Synonym(ontology, s) for s in termdata.synonyms) @synonyms.setter # type: ignore @typechecked(property=True) def synonyms(self, synonyms: FrozenSet[Synonym]): self._data().synonyms = {syn._data() for syn in synonyms} @property def union_of(self) -> _S: s = self._Set() s._ids = self._data().union_of s._ontology = self._ontology() return s @union_of.setter def union_of(self, union_of: Iterable[_E]) -> None: if isinstance(union_of, EntitySet): data = union_of._ids else: data = set() for entity in union_of: if not isinstance(entity, Entity): ty = type(entity).__name__ raise TypeError(f"expected `Entity`, found {ty}") data.add(entity.id) if len(data) == 1: raise ValueError("'union_of' cannot have a cardinality of 1") self._data().union_of = data @property def xrefs(self) -> FrozenSet[Xref]: """`frozenset` of `Xref`: A set of database cross-references. Xrefs can be used to describe an analogous entity in another vocabulary, such as a database or a semantic knowledge base. """ return frozenset(self._data().xrefs) @xrefs.setter # type: ignore @typechecked(property=True) def xrefs(self, xrefs: FrozenSet[Xref]): self._data().xrefs = set(xrefs) # --- Convenience methods ------------------------------------------------
[docs] def add_synonym( self, description: str, scope: Optional[str] = None, type: Optional[SynonymType] = None, xrefs: Optional[Iterable[Xref]] = None, ) -> Synonym: """Add a new synonym to the current entity. Arguments: description (`str`): The alternate definition of the entity, or a related human-readable synonym. scope (`str` or `None`): An optional synonym scope. Must be either **EXACT**, **RELATED**, **BROAD** or **NARROW** if given. type (`~pronto.SynonymType` or `None`): An optional synonym type. Must be declared in the header of the current ontology. xrefs (iterable of `Xref`, or `None`): A collections of database cross-references backing the origin of the synonym. Raises: ValueError: when given an invalid synonym type or scope. Returns: `~pronto.Synonym`: A new synonym for the terms. The synonym is already added to the `Entity.synonyms` collection. """ # check the type is declared in the current ontology if type is None: type_id: Optional[str] = None else: try: type_id = self._ontology().get_synonym_type(type.id).id except KeyError as ke: raise ValueError(f"undeclared synonym type {type.id!r}") from ke data = SynonymData(description, scope, type_id, xrefs=xrefs) self._data().synonyms.add(data) return Synonym(self._ontology(), data)
class EntitySet(typing.Generic[_E], typing.MutableSet[_E]): """A specialized mutable set to store `Entity` instances.""" # --- Magic methods ------------------------------------------------------ def __init__(self, entities: Optional[Iterable[_E]] = None) -> None: self._ids: Set[str] = set() self._ontology: "Optional[Ontology]" = None for entity in entities if entities is not None else (): if __debug__ and not isinstance(entity, Entity): err_msg = "'entities' must be iterable of Entity, not {}" raise TypeError(err_msg.format(type(entity).__name__)) if self._ontology is None: self._ontology = entity._ontology() if self._ontology is not entity._ontology(): raise ValueError("entities do not originate from the same ontology") self._ids.add(entity.id) def __contains__(self, other: object): if isinstance(other, Entity): return other.id in self._ids return False def __iter__(self) -> Iterator[_E]: return map(lambda t: self._ontology[t], iter(self._ids)) # type: ignore def __len__(self): return len(self._ids) def __repr__(self): ontology = self._ontology elements = (repr(ontology[id_]) for id_ in self._ids) return f"{type(self).__name__}({{{', '.join(elements)}}})" def __iand__(self, other: AbstractSet[_E]) -> "EntitySet[_E]": if isinstance(other, EntitySet): self._ids &= other._ids else: super().__iand__(other) if not self._ids: self._ontology = None return self def __and__(self, other: AbstractSet[_E]) -> "EntitySet[_E]": if isinstance(other, EntitySet): s = type(self)() s._ids = self._ids.__and__(other._ids) s._ontology = self._ontology if s._ids else None else: s = type(self)(super().__and__(other)) return s def __ior__(self, other: AbstractSet[_E]) -> "EntitySet[_E]": if not isinstance(other, EntitySet): other = type(self)(other) self._ids |= other._ids self._ontology = self._ontology or other._ontology return self def __or__(self, other: AbstractSet[_E]) -> "EntitySet[_E]": if isinstance(other, EntitySet): s = type(self)() s._ids = self._ids.__or__(other._ids) s._ontology = self._ontology or other._ontology else: s = type(self)(super().__or__(other)) return s def __isub__(self, other: AbstractSet[_E]) -> "EntitySet[_E]": if isinstance(other, EntitySet): self._ids -= other._ids else: super().__isub__(other) if not self._ids: self._ontology = None return self def __sub__(self, other: AbstractSet[_E]) -> "EntitySet[_E]": if isinstance(other, EntitySet): s = type(self)() s._ids = self._ids.__sub__(other._ids) s._ontology = self._ontology else: s = type(self)(super().__sub__(other)) return s def __ixor__(self, other: AbstractSet[_E]) -> "EntitySet[_E]": if isinstance(other, EntitySet): self._ids ^= other._ids self._ontology = self._ontology or other._ontology else: super().__ixor__(other) if not self._ids: self._ontology = None return self def __xor__(self, other: AbstractSet[_E]) -> "EntitySet[_E]": if isinstance(other, EntitySet): s = type(self)() s._ids = self._ids.__xor__(other._ids) s._ontology = self._ontology or other._ontology else: s = type(self)(super().__xor__(other)) if not s._ids: s._ontology = None return s # --- Methods ------------------------------------------------------------ def add(self, entity: _E) -> None: if self._ontology is None: self._ontology = entity._ontology() elif self._ontology is not entity._ontology(): raise ValueError("cannot use `Entity` instances from different `Ontology`") self._ids.add(entity.id) def clear(self) -> None: self._ids.clear() self._ontology = None def discard(self, entity: _E) -> None: self._ids.discard(entity.id) def pop(self) -> _E: id_ = self._ids.pop() entity = self._ontology[id_] # type: ignore if not self._ids: self._ontology = None return entity # type: ignore def remove(self, entity: _E): if self._ontology is not None and self._ontology is not entity._ontology(): raise ValueError("cannot use `Entity` instances from different `Ontology`") self._ids.remove(entity.id) # --- Attributes --------------------------------------------------------- @property def ids(self) -> FrozenSet[str]: return frozenset(self._ids) @property def alternate_ids(self) -> FrozenSet[str]: return frozenset(id for entity in self for id in entity.alternate_ids) @property def names(self) -> FrozenSet[str]: return frozenset(map(operator.attrgetter("name"), iter(self)))