Source code for pronto.entity

import datetime
import dataclasses
import operator
import typing
import weakref
from dataclasses import field
from typing import AbstractSet, Any, Dict, FrozenSet, Iterable, Iterator, Optional, Set

from ..definition import Definition
from ..pv import PropertyValue
from ..synonym import Synonym, SynonymData, SynonymType
from ..utils.meta import dataclass, roundrepr, typechecked
from ..xref import Xref

if typing.TYPE_CHECKING:
    from ..ontology import _DataGraph, Ontology
    from ..relationship import Relationship, RelationshipSet
    from .attributes import Relationships

__all__ = ["EntityData", "Entity", "EntitySet"]
_D = typing.TypeVar("_D", bound="EntityData")
_E = typing.TypeVar("_E", bound="Entity")
_S = typing.TypeVar("_S", bound="EntitySet")


@dataclass(init=True, slots=True, weakref_slot=True)
class EntityData:

    id: str
    alternate_ids: Set[str] = field(default_factory=set)
    annotations: Set[PropertyValue] = field(default_factory=set)
    anonymous: bool = field(default=False)
    builtin: bool = field(default=False)
    comment: Optional[str] = field(default=None)
    consider: Set[str] = field(default_factory=set)
    created_by: Optional[str] = field(default=None)
    creation_date: Optional[datetime.datetime] = field(default=None)
    disjoint_from: Set[str] = field(default_factory=set)
    definition: Optional[Definition] = field(default=None)
    equivalent_to: Set[str] = field(default_factory=set)
    name: Optional[str] = field(default=None)
    namespace: Optional[str] = field(default=None)
    obsolete: bool = field(default=False)
    relationships: Dict[str, Set[str]] = field(default_factory=dict)
    replaced_by: Set[str] = field(default_factory=set)
    subsets: Set[str] = field(default_factory=set)
    synonyms: Set[SynonymData] = field(default_factory=set)
    union_of: Set[str] = field(default_factory=set)
    xrefs: Set[Xref] = field(default_factory=set)

    if typing.TYPE_CHECKING:
        __annotations__: Dict[str, str]



[docs]
class Entity(typing.Generic[_D, _S]):
    """An entity in the ontology graph.

    With respects to the OBO semantics, an `Entity` is either a term or a
    relationship in the ontology graph. Any entity has a unique identifier as
    well as some common properties.
    """

    if __debug__ or typing.TYPE_CHECKING:

        __data: "weakref.ReferenceType[_D]"
        __slots__: Iterable[str] = ()

        def __init__(self, ontology: "Ontology", data: "_D"):
            self.__data = weakref.ref(data)
            self.__id = data.id
            self.__ontology = ontology

        def _data(self) -> "EntityData":
            rdata = self.__data()
            if rdata is None:
                raise RuntimeError("internal data was deallocated")
            return rdata

    else:

        __slots__: Iterable[str] = ("_data",)  # type: ignore

        def __init__(self, ontology: "Ontology", data: "_D"):
            self._data = weakref.ref(data)  # type: ignore
            self.__ontology = ontology
            self.__id = data.id

    _Set: typing.ClassVar[typing.Type[_S]] = NotImplemented
    _data_getter: typing.Callable[["Ontology"], "_DataGraph"] = NotImplemented

    # --- Private helpers ----------------------------------------------------

    def _ontology(self) -> "Ontology":
        return self.__ontology

    # --- Magic Methods ------------------------------------------------------


[docs]
    def __eq__(self, other: Any) -> bool:
        if isinstance(other, Entity):
            return self.id == other.id
        return False



[docs]
    def __lt__(self, other):
        if isinstance(other, Entity):
            return self.id < other.id
        return NotImplemented



[docs]
    def __le__(self, other):
        if isinstance(other, Entity):
            return self.id <= other.id
        return NotImplemented



[docs]
    def __gt__(self, other):
        if isinstance(other, Entity):
            return self.id > other.id
        return NotImplemented



[docs]
    def __ge__(self, other):
        if isinstance(other, Entity):
            return self.id >= other.id
        return NotImplemented



[docs]
    def __hash__(self):
        return hash((self.id))



[docs]
    def __repr__(self):
        return roundrepr.make(type(self).__name__, self.id, name=(self.name, None))


    # --- Data descriptors ---------------------------------------------------

    @property
    def alternate_ids(self) -> Set[str]:
        """`set` of `str`: A set of alternate IDs for this entity."""
        return self._data().alternate_ids

    @alternate_ids.setter  # type: ignore
    def alternate_ids(self, ids: Iterable[str]):
        self._data().alternate_ids = set(ids)

    @property
    def annotations(self) -> Set[PropertyValue]:
        """`set` of `PropertyValue`: Annotations relevant to the entity."""
        return self._data().annotations

    @annotations.setter
    def annotations(self, value: Iterable[PropertyValue]) -> None:
        self._data().annotations = set(value)

    @property
    def anonymous(self) -> bool:
        """`bool`: Whether or not the entity has an anonymous id.

        Semantics of anonymous entities are the same as B-Nodes in RDF.
        """
        return self._data().anonymous

    @anonymous.setter
    def anonymous(self, value: bool):
        self._data().anonymous = value

    @property
    def builtin(self) -> bool:
        """`bool`: Whether or not the entity is built-in to the OBO format.

        ``pronto`` uses this tag on the ``is_a`` relationship, which is the
        axiomatic to the OBO language but treated as a relationship in the
        library.
        """
        return self._data().builtin

    @builtin.setter  # type: ignore
    @typechecked(property=True)
    def builtin(self, value: bool):
        self._data().builtin = value

    @property
    def comment(self) -> Optional[str]:
        """`str` or `None`: A comment about the current entity.

        Comments in ``comment`` clauses are guaranteed to be conserved by OBO
        parsers and serializers, unlike bang comments. A non `None` `comment`
        is semantically equivalent to a ``rdfs:comment`` in OWL2. When parsing
        from OWL, several RDF comments will be merged together into a single
        ``comment`` clause spanning over multiple lines.
        """
        return self._data().comment

    @comment.setter
    def comment(self, value: Optional[str]):
        self._data().comment = value

    @property
    def consider(self) -> _S:
        """`EntitySet`: A set of potential substitutes for an obsolete term.

        An obsolete entity can provide one or more entities which may be
        appropriate substitutes, but needs to be looked at carefully by a
        human expert before the replacement is done.

        See Also:
            `~Entity.replaced_by`, which provides a set of entities suitable
            for automatic replacement.

        """
        s = self._Set()
        s._ids = self._data().consider
        s._ontology = self._ontology()
        return s

    @consider.setter
    def consider(self, consider: Iterable[_E]) -> None:
        if isinstance(consider, EntitySet):
            data = consider._ids
        else:
            data = {entity.id for entity in consider}
        self._data().consider = data

    @property
    def created_by(self) -> Optional[str]:
        """`str` or `None`: The name of the creator of the entity, if any.

        This property gets translated to a ``dc:creator`` annotation in OWL2,
        which has very broad semantics. Some OBO ontologies may instead use
        other annotation properties such as the ones found in `Information
        Interchange Ontology <http://www.obofoundry.org/ontology/iao.html>`_,
        which can be accessed in the `annotations` attribute of the entity,
        if any.
        """
        return self._data().created_by

    @created_by.setter  # type: ignore
    @typechecked(property=True)
    def created_by(self, value: Optional[str]):
        self._data().created_by = value

    @property
    def creation_date(self) -> Optional[datetime.datetime]:
        """`~datetime.datetime` or `None`: The date the entity was created."""
        return self._data().creation_date

    @creation_date.setter  # type: ignore
    @typechecked(property=True)
    def creation_date(self, value: Optional[datetime.datetime]):
        self._data().creation_date = value

    @property
    def definition(self) -> Optional[Definition]:
        """`Definition` or `None`: The definition of the current entity.

        Definitions in OBO are intended to be human-readable text describing
        the entity, with some additional cross-references if possible.

        Example:
            >>> hp = pronto.Ontology.from_obo_library("hp.obo")
            >>> term = hp["HP:0009882"]
            >>> term.name
            'Short distal phalanx of finger'
            >>> str(term.definition)
            'Short distance from the end of the finger to the most distal...'
            >>> sorted(term.definition.xrefs)
            [Xref('HPO:probinson'), Xref('PMID:19125433')]

        """
        return self._data().definition

    @definition.setter  # type: ignore
    @typechecked(property=True)
    def definition(self, definition: Optional[Definition]):
        self._data().definition = definition

    @property
    def disjoint_from(self) -> _S:
        """`EntitySet`: The entities declared as disjoint from this entity.

        Two entities are disjoint if they have no instances in common. Two
        entities that are disjoint cannot share any subentities, but the
        opposite is not always true.
        """
        s = self._Set()
        s._ids = self._data().disjoint_from
        s._ontology = self._ontology()
        return s

    @disjoint_from.setter
    def disjoint_from(self, disjoint: Iterable[_E]):
        if isinstance(disjoint, EntitySet):
            data = disjoint._ids
        else:
            data = {entity.id for entity in disjoint}
        self._data().disjoint_from = data

    @property
    def equivalent_to(self) -> _S:
        """`EntitySet`: The entities declared as equivalent to this entity."""
        s = self._Set()
        s._ids = self._data().equivalent_to
        s._ontology = self._ontology()
        return s

    @equivalent_to.setter
    def equivalent_to(self, entities: Iterable[_E]):
        if isinstance(entities, EntitySet):
            data = entities._ids
        else:
            data = {entity.id for entity in entities}
        self._data().equivalent_to = data

    @property
    def id(self) -> str:
        """`str`: The OBO identifier of the entity.

        Identifiers can be either prefixed (e.g. ``MS:1000031``), unprefixed
        (e.g. ``part_of``) or given as plain URLs. Identifiers cannot be
        edited.
        """
        return self.__id

    @property
    def name(self) -> Optional[str]:
        """`str` or `None`: The name of the entity.

        Names are formally equivalent to ``rdf:label`` in OWL2. The OBO format
        version 1.4 made names optional to improve OWL interoperability, as
        labels are optional in OWL.
        """
        return self._data().name

    @name.setter  # type: ignore
    @typechecked(property=True)
    def name(self, value: Optional[str]):
        self._data().name = value

    @property
    def namespace(self) -> Optional[str]:
        """`str` or `None`: The namespace this entity is defined in."""
        return self._data().namespace

    @namespace.setter  # type: ignore
    @typechecked(property=True)
    def namespace(self, ns: Optional[str]):
        self._data().namespace = ns

    @property
    def obsolete(self) -> bool:
        """`bool`: Whether or not the entity is obsolete.

        Hint:
            All OBO entities can be made obsolete through a boolean flag, and
            map to one or several replacements. When querying an obsolete
            entity, ``pronto`` will **not** attempt to perform any kind of
            replacement itself ::

                >>> ms = pronto.Ontology.from_obo_library("ms.obo")
                >>> term = ms["MS:1001414"]
                >>> term
                Term('MS:1001414', name='MGF scans')
                >>> term.obsolete
                True

            To always get the up-to-date, non-obsolete entity, you could use
            the following snippet, going through a term replacement if there
            is no ambiguity ::

                >>> while term.obsolete:
                ...     if len(term.replaced_by) != 1:
                ...         raise ValueError(f"no replacement for {term.id}")
                ...     term = term.replaced_by.pop()
                >>> term
                Term('MS:1000797', name='peak list scans')

        See Also:
            `~.Entity.consider` and `~Entity.replaced_by`, storing some
            replacement options for an obsolete entity.

        """
        return self._data().obsolete

    @obsolete.setter  # type: ignore
    @typechecked(property=True)
    def obsolete(self, value: bool):
        self._data().obsolete = value

    @property
    def relationships(self: _E) -> "Relationships[_E, _S]":
        """`~.Relationships`: The links from an entity to other entities.

        This property returns an object that maps a `~.Relationship` to
        an `~.EntitySet` (either a `~.TermSet` for `Term.relationships`, or
        a `~.RelationshipSet` for `Relationship.relationships`).

        Hint:
            The mapping is mutable, so relationships can be created or removed
            using the usual interface of a `~collections.abc.MutableMapping`.

        Example:
            Get the ``MS:1000004`` term (*sample mass*) from the Mass
            Spectrometry ontology::

                >>> ms = pronto.Ontology.from_obo_library("ms.obo")
                >>> sample_mass = ms["MS:1000004"]

            Then use the ``relationships`` property to get the relevant
            unit from the Unit Ontology::

                >>> sorted(sample_mass.relationships.keys())
                [Relationship('has_units', name='has_units')]
                >>> sample_mass.relationships[ms.get_relationship('has_units')]
                TermSet({Term('UO:0000021', name='gram')})

        """
        from .attributes import Relationships

        return Relationships(self)

    @relationships.setter
    def relationships(self, rels: typing.Mapping["Relationship", Iterable[_E]]):
        self._data().relationships = {
            relation.id: set(entity.id for entity in entities)
            for relation, entities in rels.items()
        }

    @property
    def replaced_by(self) -> _S:
        """`EntitySet`: A set of of replacements for an obsolete term.

        An obsolete entity can provide one or more replacement that can
        safely be used to automatically reassign instances to non-obsolete
        classes.

        See Also:
            `~Entity.consider`, which provides a set of entities suitable
            for replacement but requiring expert curation.

        """
        s = self._Set()
        s._ids = self._data().replaced_by
        s._ontology = self._ontology()
        return s

    @replaced_by.setter
    def replaced_by(self, replacements: Iterable[_E]) -> None:
        if isinstance(replacements, EntitySet):
            data = replacements._ids
        else:
            data = set(entity.id for entity in replacements)
        self._data().replaced_by = data

    @property
    def subsets(self) -> FrozenSet[str]:
        """`frozenset` of `str`: The subsets containing this entity."""
        return frozenset(self._data().subsets)

    @subsets.setter  # type: ignore
    @typechecked(property=True)
    def subsets(self, subsets: FrozenSet[str]):
        declared = set(s.name for s in self._ontology().metadata.subsetdefs)
        for subset in subsets:
            if subset not in declared:
                raise ValueError(f"undeclared subset: {subset!r}")
        self._data().subsets = set(subsets)

    @property
    def synonyms(self) -> FrozenSet[Synonym]:
        """`frozenset` of `Synonym`: A set of synonyms for this entity."""
        ontology, termdata = self._ontology(), self._data()
        return frozenset(Synonym(ontology, s) for s in termdata.synonyms)

    @synonyms.setter  # type: ignore
    @typechecked(property=True)
    def synonyms(self, synonyms: FrozenSet[Synonym]):
        self._data().synonyms = {syn._data() for syn in synonyms}

    @property
    def union_of(self) -> _S:
        s = self._Set()
        s._ids = self._data().union_of
        s._ontology = self._ontology()
        return s

    @union_of.setter
    def union_of(self, union_of: Iterable[_E]) -> None:
        if isinstance(union_of, EntitySet):
            data = union_of._ids
        else:
            data = set()
            for entity in union_of:
                if not isinstance(entity, Entity):
                    ty = type(entity).__name__
                    raise TypeError(f"expected `Entity`, found {ty}")
                data.add(entity.id)
        if len(data) == 1:
            raise ValueError("'union_of' cannot have a cardinality of 1")
        self._data().union_of = data

    @property
    def xrefs(self) -> FrozenSet[Xref]:
        """`frozenset` of `Xref`: A set of database cross-references.

        Xrefs can be used to describe an analogous entity in another
        vocabulary, such as a database or a semantic knowledge base.
        """
        return frozenset(self._data().xrefs)

    @xrefs.setter  # type: ignore
    @typechecked(property=True)
    def xrefs(self, xrefs: FrozenSet[Xref]):
        self._data().xrefs = set(xrefs)

    # --- Convenience methods ------------------------------------------------


[docs]
    def add_synonym(
        self,
        description: str,
        scope: Optional[str] = None,
        type: Optional[SynonymType] = None,
        xrefs: Optional[Iterable[Xref]] = None,
    ) -> Synonym:
        """Add a new synonym to the current entity.

        Arguments:
            description (`str`): The alternate definition of the entity, or a
                related human-readable synonym.
            scope (`str` or `None`): An optional synonym scope. Must be either
                **EXACT**, **RELATED**, **BROAD** or **NARROW** if given.
            type (`~pronto.SynonymType` or `None`): An optional synonym type.
                Must be declared in the header of the current ontology.
            xrefs (iterable of `Xref`, or `None`): A collections of database
                cross-references backing the origin of the synonym.

        Raises:
            ValueError: when given an invalid synonym type or scope.

        Returns:
            `~pronto.Synonym`: A new synonym for the terms. The synonym is
            already added to the `Entity.synonyms` collection.

        """
        # check the type is declared in the current ontology
        if type is None:
            type_id: Optional[str] = None
        else:
            try:
                type_id = self._ontology().get_synonym_type(type.id).id
            except KeyError as ke:
                raise ValueError(f"undeclared synonym type {type.id!r}") from ke

        data = SynonymData(description, scope, type_id, xrefs=xrefs)
        self._data().synonyms.add(data)
        return Synonym(self._ontology(), data)




class EntitySet(typing.Generic[_E], typing.MutableSet[_E]):
    """A specialized mutable set to store `Entity` instances."""

    # --- Magic methods ------------------------------------------------------

    def __init__(self, entities: Optional[Iterable[_E]] = None) -> None:
        self._ids: Set[str] = set()
        self._ontology: "Optional[Ontology]" = None

        for entity in entities if entities is not None else ():
            if __debug__ and not isinstance(entity, Entity):
                err_msg = "'entities' must be iterable of Entity, not {}"
                raise TypeError(err_msg.format(type(entity).__name__))
            if self._ontology is None:
                self._ontology = entity._ontology()
            if self._ontology is not entity._ontology():
                raise ValueError("entities do not originate from the same ontology")
            self._ids.add(entity.id)

    def __contains__(self, other: object):
        if isinstance(other, Entity):
            return other.id in self._ids
        return False

    def __iter__(self) -> Iterator[_E]:
        return map(lambda t: self._ontology[t], iter(self._ids))  # type: ignore

    def __len__(self):
        return len(self._ids)

    def __repr__(self):
        ontology = self._ontology
        elements = (repr(ontology[id_]) for id_ in self._ids)
        return f"{type(self).__name__}({{{', '.join(elements)}}})"

    def __iand__(self, other: AbstractSet[_E]) -> "EntitySet[_E]":
        if isinstance(other, EntitySet):
            self._ids &= other._ids
        else:
            super().__iand__(other)
        if not self._ids:
            self._ontology = None
        return self

    def __and__(self, other: AbstractSet[_E]) -> "EntitySet[_E]":
        if isinstance(other, EntitySet):
            s = type(self)()
            s._ids = self._ids.__and__(other._ids)
            s._ontology = self._ontology if s._ids else None
        else:
            s = type(self)(super().__and__(other))
        return s

    def __ior__(self, other: AbstractSet[_E]) -> "EntitySet[_E]":
        if not isinstance(other, EntitySet):
            other = type(self)(other)
        self._ids |= other._ids
        self._ontology = self._ontology or other._ontology
        return self

    def __or__(self, other: AbstractSet[_E]) -> "EntitySet[_E]":
        if isinstance(other, EntitySet):
            s = type(self)()
            s._ids = self._ids.__or__(other._ids)
            s._ontology = self._ontology or other._ontology
        else:
            s = type(self)(super().__or__(other))
        return s

    def __isub__(self, other: AbstractSet[_E]) -> "EntitySet[_E]":
        if isinstance(other, EntitySet):
            self._ids -= other._ids
        else:
            super().__isub__(other)
        if not self._ids:
            self._ontology = None
        return self

    def __sub__(self, other: AbstractSet[_E]) -> "EntitySet[_E]":
        if isinstance(other, EntitySet):
            s = type(self)()
            s._ids = self._ids.__sub__(other._ids)
            s._ontology = self._ontology
        else:
            s = type(self)(super().__sub__(other))
        return s

    def __ixor__(self, other: AbstractSet[_E]) -> "EntitySet[_E]":
        if isinstance(other, EntitySet):
            self._ids ^= other._ids
            self._ontology = self._ontology or other._ontology
        else:
            super().__ixor__(other)
        if not self._ids:
            self._ontology = None
        return self

    def __xor__(self, other: AbstractSet[_E]) -> "EntitySet[_E]":
        if isinstance(other, EntitySet):
            s = type(self)()
            s._ids = self._ids.__xor__(other._ids)
            s._ontology = self._ontology or other._ontology
        else:
            s = type(self)(super().__xor__(other))
        if not s._ids:
            s._ontology = None
        return s

    # --- Methods ------------------------------------------------------------

    def add(self, entity: _E) -> None:
        if self._ontology is None:
            self._ontology = entity._ontology()
        elif self._ontology is not entity._ontology():
            raise ValueError("cannot use `Entity` instances from different `Ontology`")
        self._ids.add(entity.id)

    def clear(self) -> None:
        self._ids.clear()
        self._ontology = None

    def discard(self, entity: _E) -> None:
        self._ids.discard(entity.id)

    def pop(self) -> _E:
        id_ = self._ids.pop()
        entity = self._ontology[id_]  # type: ignore
        if not self._ids:
            self._ontology = None
        return entity  # type: ignore

    def remove(self, entity: _E):
        if self._ontology is not None and self._ontology is not entity._ontology():
            raise ValueError("cannot use `Entity` instances from different `Ontology`")
        self._ids.remove(entity.id)

    # --- Attributes ---------------------------------------------------------

    @property
    def ids(self) -> FrozenSet[str]:
        return frozenset(self._ids)

    @property
    def alternate_ids(self) -> FrozenSet[str]:
        return frozenset(id for entity in self for id in entity.alternate_ids)

    @property
    def names(self) -> FrozenSet[str]:
        return frozenset(map(operator.attrgetter("name"), iter(self)))