Source code for phileas.iteration.base

"""
This module contains the definition of the base type and classes used for
iteration (data tree, pseudo data tree and iteration tree).
"""

from __future__ import annotations

import dataclasses
import math
import typing
from abc import ABC, abstractmethod
from copy import deepcopy
from dataclasses import dataclass, field
from enum import Enum
from typing import Callable, Generic, TypeVar

from typing_extensions import Self, assert_never

from ..logging import logger
from ..utility import Sentinel

#################
### Data tree ###
#################

#: Data values that can be used as data tree leaves
DataLiteral = typing.Union[None, bool, str, int, float, "_NoDefault"]

#: Dictionary keys
Key = bool | str | int | float

#: A data tree consists of literal leaves, and dictionary or list nodes
DataTree = DataLiteral | dict[Key, "DataTree"] | list["DataTree"]

########################
### Pseudo data tree ###
########################

# Note: a DataTree is a PseudoDataTree, but mypy seems not to understand it.

#: A leave of a pseudo data tree is either a data tree leave, or a non-trivial
#: iteration tree leave.
PseudoDataLiteral = typing.Union[DataLiteral, "IterationLeaf"]

#: A pseudo data tree is a data tree whose leaves can be non literal iteration
#: leaves.
PseudoDataTree = (
    PseudoDataLiteral | dict[Key, "PseudoDataTree"] | list["PseudoDataTree"]
)

######################
### Iteration tree ###
######################


### Iteration ###


[docs] class DefaultIndex(Sentinel): """ Index of the default value of an iteration tree. """ pass
T = TypeVar("T", bound="IterationTree", covariant=True)
[docs] class TreeIterator(ABC, Generic[T]): """ Iteration tree iterator. Compared to a usual iterator, it supports forward and backward iteration, and is endlessly usable. This means that, whenever it is "exhausted" (:py:meth:`__next__` raises ``StopIteration``), it can either be reset to its starting position with :py:meth:`reset`, or its iteration direction can be switched using :py:meth:`reverse`. Additionally, it supports random access with the :py:meth:`__getitem__` method, which uses :py:meth:`update` under the hood. """ #: Reference to the tree being iterated over. tree: T #: Current iteration direction of the iterator. #: #: .. warning #: This attribute is managed by :py:class:`TreeIterator`, and should #: thus not be modified by sub-classes. However, it can be read. __forward: bool #: Position of the last value that was yielded. Valid values start at -1 #: (backward-exhausted iterator, or forward iteration start), and go up to #: :py:attr:`size` (forward-exhausted iterator, or backward iteration #: start). #: #: It can be directly modified by :py:meth:`update`. #: #: .. warning #: This attribute is managed by :py:class:`TreeIterator`, thus it must #: not be modified by sub-classes. However, it can be read. position: int #: Cached size of the iterated tree. Consider using this instead of ``len #: (self.tree)``, as computing the length of a tree can be an expensive #: operation. size: int | None def __init__(self, tree: T) -> None: self.__forward = True self.position = -1 self.tree = tree self.size = tree.safe_len() def __iter__(self: Self) -> Self: return self
[docs] def reset(self): """ Reset the internal state of the iterator, so that its next value will be the first iterated value in the current direction. It takes into account the value of forward, going either to the start or the end of the iterated collection. """ if self.__forward: self.position = -1 else: if self.size is None: raise ValueError("Cannot reset a backward infinite iterator.") self.position = self.size
[docs] def is_forward(self) -> bool: """ Returns whether the iterator is going forward or not. """ return self.__forward
[docs] def reverse(self): """ Reverse the iteration direction of the iterator, but stay at the same position. Thus, if ``it`` is any :py:class:`TreeIterator`, the following behavior is expected: >>> it.reset() >>> it.reverse() >>> list(it) [] If you want to iterate over an iteration tree ``tree`` backward, you have to reset the iterator after having reversed it: >>> it = iter(tree) >>> it.reverse() >>> it.reset() """ self.__forward = not self.__forward
[docs] def update(self, position: int): """ Update the position of the iterator to any supported position. This includes the positions ranging from ``0`` to ``self.size - 1``, included, as well as - ``-1``, which represents a reset forward iterator and - ``self.size``, which represents a reset backward iterator. If an invalid position is requested, an :py:class:`IndexError` is raised, and the state of the iterator remains unchanged. """ if position < -1: raise IndexError("Cannot update to a position < -1.") try: if self.size is not None and position > self.size: raise IndexError("Cannot update to a position > size.") except InfiniteLength: # There is no upper bound to the indices of an infinite tree pass self.position = position
def __getitem__(self, position: int | DefaultIndex) -> DataTree: """ Return the element at ``position``. If it is a :py:class:`DefaultIndex`, return the default value of the iteration tree, using the :py:attr:`NoDefaultPolicy.FIRST_ELEMENT policy`. Raises: IndexError: if the index is invalid. """ if isinstance(position, DefaultIndex): return self.tree.default(NoDefaultPolicy.FIRST_ELEMENT) size = math.inf if self.size is None else self.size if position < 0: if size < math.inf: assert isinstance(size, int) position = size + position else: raise IndexError( f"Negative index {position} is not supported with infinite " + "tree." ) elif position >= size: raise IndexError(f"Index {position} larger than the tree size.") self.update(position) return self._current_value() @abstractmethod def _current_value(self) -> DataTree: """ Return the value at :py:attr:`position`. Assumptions: - The ownership of the return value is given to the user. - ``self._position`` is a valid value, between ``0`` and ``self.size - 1``. Thus, it should never raise a :py:class:`StopIteration`. """ raise NotImplementedError() def __next__(self) -> DataTree: """ Return the next value in the current iteration direction. """ if self.__forward: self.position += 1 if self.size is not None and self.position >= self.size: self.position = self.size raise StopIteration else: self.position -= 1 if self.position < 0: self.position = -1 raise StopIteration return self._current_value()
[docs] class OneWayTreeIterator: """ Iterator used in cases where random access iteration is too cumbersome to implement. It only requires implementing the :py:meth:`_next` method. Random access is obtained by using a cache. Thus, this method is usually more time and memory expensive than classical random access iterators, so it should only be used as a last resort. For now, the cache size is unbound, as it stores all the iterated values. This might change in the future. This is not a subclass of `TreeIterator` in order to prevent diamond inheritance issues. Child classes must thus inherit from this, alongside :py:class:`TreeIterator`. It is recommended to place :py:meth:`OneWayTreeIterator` first, so that its :py:meth:`_current_value` implementation is used. """ # Attributes defined in `TreeIterator` and used here position: int size: int | None #: Cache of the already generated values. __cache: list[DataTree] #: Last generated position. __last_position: int def __init__(self) -> None: self.__cache = [] self.__last_position = -1 @abstractmethod def _next(self) -> DataTree: """Next iteration value.""" raise NotImplementedError() def _current_value(self) -> DataTree: if self.position >= len(self.__cache): position = self.__last_position while position < self.position: try: value = self._next() self.__cache.append(value) position += 1 except StopIteration as e: tree_size = self.size if self.size is not None else "infinite" raise ValueError( f"Iterator exhausted at position {self.position}, " f"whereas the tree size is {tree_size}." ) from e self.__last_position = position return deepcopy(self.__cache[self.position])
### Indexing ### class _Child(Sentinel): """ Sentinel representing the index of the only child of a unary nodes. """ pass child = _Child() ChildPath = list[Key | _Child] ### Default related objects ###
[docs] class NoDefaultPolicy(Enum): """ Behavior of :py:meth:`IterationTree.default` for trees not having a default value. """ #: Raise a :py:class:`NoDefaultError` if any of the nodes in the tree does #: not have a default value. ERROR = "ERROR" #: Return the :py:class:`_NoDefault` sentinel if any of the nodes in the #: tree does not have a default value. SENTINEL = "SENTINEL" #: Skip elements without a default element. If the root of the tree does #: not have a default value, return a :py:class:`_NoDefault` sentinel. #: #: Note that this is not supported by iteration method nodes with list #: children. SKIP = "SKIP" #: Return the first element of iteration leaves without a default value. FIRST_ELEMENT = "FIRST_ELEMENT"
[docs] class NoDefaultError(Exception): """ Indicates that :py:meth:`IterationTree.default` has been called on an iteration tree where a node does not have a default value. """ #: Path of a child without a default value. path: ChildPath def __init__(self, message: str | None, path: ChildPath) -> None: super().__init__(message) self.path = path @staticmethod def build_from(tree: IterationTree) -> NoDefaultError: return NoDefaultError(f"{tree.__class__.__name__} without a default value.", []) def __str__(self): path_msg = ( f" (path {'/'.join(map(str, self.path))})" if len(self.path) > 0 else "" ) return f"{super().__str__()}{path_msg}"
class _NoDefault(Sentinel): """ Sentinel representing a default value which is not set. """ pass #: You can store this value - instead of an actual default value - in instances #: of classes that can have a default value, but don't. no_default = _NoDefault() ### Iteration tree API ### if typing.TYPE_CHECKING: from .node import UnaryNode
[docs] class InfiniteLength(BaseException): """ Exception raised when requesting the length of an infinite collection. """ pass
[docs] @dataclass(frozen=True) class IterationTree(ABC): """ Represents a set of data trees, as well as the way to iterate over them. In order to be able to get a single data tree from an iteration tree, they are able to build a default data tree, which (usually) has the same shape as the generated data tree. An iteration tree cannot be modified, as it is a frozen :py:class:`dataclass`. Instead, a new one must be created from this one. """ #: Names of the available configurations. configurations: frozenset[Key] = field( default_factory=frozenset, init=False, repr=False )
[docs] def get_configuration(self, key: Key) -> IterationTree: """ Returns a given configuration of the tree, if it exists. Otherwise, raises a :py:class:`KeyError`. See :py:class:`~phileas.iteration.Configurations` node for more details on the behavior of this method. """ if key not in self.configurations: raise KeyError(f"Configuration {key} does not exist.") return self._get_configuration(key)
@abstractmethod def _get_configuration(self, config_name: Key) -> IterationTree: """ Actual implementation of the configuration access in sub-classes. ``config_name`` is assumed to be a valid configuration of the tree, so recursive calls either return an actual configuration, or the unmodified tree. """ raise NotImplementedError()
[docs] def unroll_configurations(self) -> IterationTree: """ Transforms a configurable tree, *ie*. one with ``configurations != {}``, to a non-configurable tree. It requests all the available configurations, and gather them into a :py:class:`~phileas.iteration.Union` node with ``reset=False``. A :py:class:`~phileas.iteration.MoveUpTransform` is used to get rid of the name of the configurations. If the tree is not configurable, return it. """ if len(self.configurations) == 0: return self from phileas.iteration import Union from phileas.iteration.node import MoveUpTransform children = {name: self.get_configuration(name) for name in self.configurations} configurations = Union( children, lazy=True, preset=None, common_preset=False, reset=None ) return MoveUpTransform(configurations)
def __iter__(self) -> TreeIterator: """ Return a two-way resetable tree iterator. """ if len(self.configurations) == 0: return self._iter() logger.warning( "Iterating through a configurable tree. It is automatically " "unrolled, but it is advised to explicitly call " "IterationTree.unroll_configurations before iteration." ) return iter(self.unroll_configurations()) @abstractmethod def _iter(self) -> TreeIterator: """ Implementation of :py:meth:`__iter__` which assumes that ``self`` does not have any configuration. """ raise NotImplementedError() def __len__(self) -> int: """ Return the number of data trees represented by the iteration tree. If it is finite, it is the same as the number of elements yielded by :py:meth:`__iter__`. Otherwise, an :py:class:`InfiniteLength` is raised. See :py:meth:`safe_len` for a method that does not raise errors. """ if len(self.configurations) == 0: return self._len() return sum( len(self.get_configuration(config)) for config in self.configurations )
[docs] def safe_len(self) -> int | None: """ Return the number of data trees represented by the iteration tree. If it is finite, it is the same as the number of elements yielded by :py:meth:`__iter__`. Otherwise, return ``None``. """ try: return len(self) except InfiniteLength: return None
@abstractmethod def _len(self) -> int: """ Implementation of :py:meth:`__len__` which assumes that ``self`` does not have any configuration. """ raise NotImplementedError()
[docs] def iterate(self) -> TreeIterator: """ Implementation of :py:meth:`__len__` which assumes that ``self`` does not have any configuration. """ raise NotImplementedError()
[docs] @abstractmethod def to_pseudo_data_tree(self) -> PseudoDataTree: """ Converts the iteration tree to a pseudo data tree. """ raise NotImplementedError()
[docs] def default( self, no_default_policy: NoDefaultPolicy = NoDefaultPolicy.ERROR ) -> DataTree | _NoDefault: """ Returns a default data tree. If the tree does not have a default value, follows the behavior dictated by ``no_default_policy``. """ try: return self._default(no_default_policy) except NoDefaultError: if no_default_policy == NoDefaultPolicy.ERROR: raise elif no_default_policy == NoDefaultPolicy.SENTINEL: return no_default elif no_default_policy == NoDefaultPolicy.FIRST_ELEMENT: return next(iter(self)) elif no_default_policy == NoDefaultPolicy.SKIP: return no_default else: assert_never(no_default_policy)
@abstractmethod def _default(self, no_default_policy: NoDefaultPolicy) -> DataTree | _NoDefault: """ Concrete implementation of :py:meth:`default`. It returns the default value, or raises a :py:class:`NoDefaultError` if it is not defined. """ raise NotImplementedError() def __getitem__(self, key: Key) -> IterationTree: """ It is implemented to allow working with an iteration tree as if it consisted of nested list and dict objects. """ return self.get([key]) # Path API # # Internal nodes and the structure of iteration trees are to be modified # using a path-based API. # # After using a modification function, only the output of the function # should be used, and `self` should be discarded.
[docs] def with_params(self: Self, path: ChildPath | None = None, **kwargs) -> Self: """ Returns a similar iteration tree, where the node at ``path`` is assigned the given keyword parameters. If ``path`` is not specified, modifies the root of the tree directly. """ valid_path = [] if path is not None: valid_path = path def modifier(tree: IterationTree, path: ChildPath) -> IterationTree: if path == valid_path: return dataclasses.replace(tree, **kwargs) return tree modified_tree = self.depth_first_modify(modifier) assert isinstance(modified_tree, type(self)) return modified_tree
[docs] def get(self, path: ChildPath) -> IterationTree: """ Get a node inside a tree. It should not be used to modify the tree. """ current = self for key in path: current = current._get(key) return current
@abstractmethod def _get(self, child_key: Key | _Child) -> IterationTree: """ Returns the root child with the given key, or raises a `KeyError` if there is no child with this key. The output might be modified by the user. """ raise NotImplementedError() @abstractmethod def _insert_child( self, child_key: Key | _Child, child: IterationTree ) -> IterationTree: """ Insert a new child tree to the root and return the newly created tree. The initial object should not be modified. It is preferable to work with a copy. """ raise NotImplementedError() @abstractmethod def _remove_child(self, child_key: Key | _Child) -> IterationTree: """ Remove a child of the root, and return the newly created tree. Raises a :py:class:`KeyError` if there is no child with this key. The initial object should not be modified. It is preferable to work with a copy. """ raise NotImplementedError() @abstractmethod def _replace_root( self, Node: type[IterationTree], *args, **kwargs ) -> IterationTree: """ Change the root node of a tree, keeping the remaining of the tree unmodified. In order to keep a valid tree structure, `Node` must have the same type as the current root, otherwise a :py:class:`TypeError` is raised. The initial object should not be modified. It is preferable to work with a copy. """ raise NotImplementedError()
[docs] def insert_child( self, path: ChildPath, tree: IterationTree | None ) -> IterationTree: """ Insert a child anywhere in the tree, whose location is specified by path argument. Return the newly created tree. If there is already a node at this location, it will be replaced. If the specified tree is ``None``, a node is supposed to exist at this location (otherwise, a `KeyError` is raised), and will be removed if possible. Only iteration methods nodes will support child removal, see their implementation of :py:meth:`_remove_child`. In any other case, a :py:class:`TypeError` is raised. Note that the root of a tree cannot be removed, so specifying an empty path with a ``None`` tree will raise a :py:class:`KeyError`. """ if tree is None and len(path) == 1: return self._remove_child(path[0]) if len(path) == 0: if tree is None: raise KeyError("Cannot remove the root of an iteration tree.") return tree else: key = path[0] new_child = tree if len(path) > 1: new_child = self._get(key).insert_child(path[1:], tree) assert new_child is not None return self._insert_child(key, new_child)
[docs] def remove_child(self, path: ChildPath) -> IterationTree: """ Remove a node in a tree. It is equivalent to ``insert_child(path, None)``. """ return self.insert_child(path, None)
[docs] def insert_unary( self, path: ChildPath, Parent: type[UnaryNode], *args, **kwargs ) -> IterationTree: """ Insert a parent to the node at the given path, parent which is necessarily a transform node, as it will only have a single child. The parent is built using the ``Parent`` class, and the supplied arguments. The newly created tree is returned. """ from .node import UnaryNode if not issubclass(Parent, UnaryNode): raise TypeError("Cannot insert a non-unary node as a parent.") if len(path) == 0: return Parent(self, *args, **kwargs) key = path[0] new_child = self._get(key).insert_unary(path[1:], Parent, *args, **kwargs) new_me = self._insert_child(key, new_child) return new_me
[docs] def replace_node( self, path: ChildPath, Node: type[IterationTree], *args, **kwargs ) -> IterationTree: """ Replace the node at the given path with another one. The other node is built using its type, ``Node``, and the ``args`` and ``kwargs`` arguments. Note that the sub-tree of the replaced node is not modified. This requires ``Node`` to be of the same kind of the node that is being replaced: a transform for a transform, an iteration method for an iteration method, a leaf for a leaf. """ if len(path) == 0: return self._replace_root(Node, *args, **kwargs) else: key = path[0] new_child = self._get(key).replace_node(path[1:], Node, *args, **kwargs) new_me = self._insert_child(key, new_child) return new_me
[docs] def depth_first_modify( self, modifier: Callable[[IterationTree, ChildPath], IterationTree] ) -> IterationTree: """ Using a post-fix depth-first search, replace each ``node`` of the tree, located at ``path``, with ``modifier(node, path)``. """ return self._depth_first_modify(modifier, [])
@abstractmethod def _depth_first_modify( self, modifier: Callable[[IterationTree, ChildPath], IterationTree], path: ChildPath, ) -> IterationTree: """ Recursive implementation of :py:meth:`depth_first_modify`, implemented by the different node types. In order to prevent side-effects, ``modifier`` should not modify its input objects. It should rather returns modified copies. """ raise NotImplementedError()
[docs] @dataclass(frozen=True) class IterationLeaf(IterationTree): def _get(self, child_key: Key | _Child) -> IterationTree: raise TypeError("Iteration leaves do not support indexing.") def _insert_child( self, child_key: Key | _Child, child: IterationTree ) -> IterationTree: raise TypeError("Iteration leaves do not support indexing.") def _remove_child(self, child_key: Key | _Child) -> IterationTree: raise TypeError("Iteration leaves do not support indexing.") def _replace_root( self, Node: type[IterationTree], *args, **kwargs ) -> IterationTree: if not issubclass(Node, IterationLeaf): raise TypeError(f"Cannot replace an iteration leaf with a {Node}") return Node(*args, **kwargs) def _depth_first_modify( self, modifier: Callable[["IterationTree", ChildPath], "IterationTree"], path: ChildPath, ) -> IterationTree: return modifier(self, path) def __getitem__(self, key: Key) -> IterationTree: raise TypeError(f"{self.__class__.__name__} does not support indexing.") def _get_configuration(self, config_name: Key) -> IterationTree: return self