Source code for phileas.parsing

"""
This module allows to parse configuration files, which use a modified YAML
syntax, to data and iteration trees, as defined in the
:py:mod:`~phileas.iteration` module. In particular, it defines the
supported custom YAML types.
"""

from __future__ import annotations

from abc import ABC, abstractmethod
from dataclasses import dataclass
from math import ceil
from pathlib import Path
from types import NoneType
from typing import Any, ClassVar, Generic, Literal, TypeVar

import numpy as np
from ruamel import yaml
from ruamel.yaml import YAML

from phileas import iteration
from phileas.iteration import Key, NumpyRNG

# Warning: using the safe loader disables calling __post_init__ in dataclasses,
# which effectively skips data verification in some custom YAML types. Using
# the round-trip loader (ie. not specifying ``typ``) solve this issue, but
# changes the signature of ``construct_mapping``.
_data_tree_parser = YAML(typ="safe")
_iteration_tree_parser = YAML(typ="safe")


[docs] def load_data_tree_from_yaml_file(file: Path | str) -> iteration.DataTree: """ Parses a YAML configuration file (from its path or its content) into a data tree. """ return _data_tree_parser.load(file)
[docs] def load_iteration_tree_from_yaml_file(file: Path | str) -> iteration.IterationTree: """ Parses a YAML configuration file (from its path or its content) into an iteration tree. Iteration will be based on cartesian products, and iteration leaves can be specified with custom YAML types. """ raw_data = _iteration_tree_parser.load(file) return raw_yaml_structure_to_iteration_tree(raw_data)
### Custom YAML types ###
[docs] class YamlCustomType(ABC): @abstractmethod def to_iteration_tree(self) -> iteration.IterationTree: raise NotImplementedError()
[docs] @_iteration_tree_parser.register_class @dataclass class Configurations(YamlCustomType): """ Configurations container. It holds a dictionary of configurations, and has three optional arguments: ``_default``, ``_move_up`` and ``_insert_name``. ``_default`` specifies the name of the default configuration. ``_move_up`` is a boolean defaulting to ``False``. If it is set, the content of the chosen configuration will be moved one level up. ``_insert_name`` is a boolean defaulting to ``False``. If it is set, the name of the chosen configuration will be inserted in the final :py:attr`~phileas.iteration.base.DataTree``. If ``move_up``, then it is assigned to the key that previously hold the ``!configurations`` node. Otherwise, it is inserted under the name ``"_configuration"``. See :py:class:`~phileas.iteration.Configurations` for more details on the impact of ``_move_up`` and ``_insert_name``. """ yaml_tag: ClassVar[str] = "!configurations" configurations: dict[Key, Any] default: Key move_up: bool insert_name: bool @classmethod def from_yaml( cls, constructor: yaml.Constructor, node: yaml.Node ) -> Configurations: if isinstance(node, yaml.MappingNode): mapping = constructor.construct_mapping(node, deep=True) default = mapping.pop("_default", None) move_up = mapping.pop("_move_up", False) insert_name = mapping.pop("_insert_name", False) return Configurations( configurations=mapping, default=default, move_up=move_up, insert_name=insert_name, ) else: raise TypeError("Configurations must be stored in a named map.") def to_iteration_tree(self) -> iteration.IterationTree: return iteration.Configurations( { name: raw_yaml_structure_to_iteration_tree(config) for name, config in self.configurations.items() }, default_configuration=self.default, move_up=self.move_up, insert_name=self.insert_name, )
[docs] @_iteration_tree_parser.register_class @dataclass class CartesianProduct(YamlCustomType): """ Cartesian product node, see :py:class:`~phileas.iteration.CartesianProduct` for the supported arguments. """ yaml_tag: ClassVar[str] = "!product" children: dict[Key, Any] | list order: list[Key] | None lazy: bool snake: bool @classmethod def from_yaml( cls, constructor: yaml.Constructor, node: yaml.Node ) -> CartesianProduct: if isinstance(node, yaml.MappingNode): mapping = constructor.construct_mapping(node, deep=True) order = mapping.pop("_order", None) lazy = mapping.pop("_lazy", False) snake = mapping.pop("_snake", False) return CartesianProduct( children=mapping, order=order, lazy=lazy, snake=snake, ) else: children = constructor.construct_sequence(node, deep=True) return CartesianProduct(children, order=None, lazy=False, snake=False) def to_iteration_tree(self) -> iteration.IterationTree: children: dict[Key, iteration.IterationTree] | list[iteration.IterationTree] if isinstance(self.children, list): children = [ raw_yaml_structure_to_iteration_tree(child) for child in self.children ] else: assert isinstance(self.children, dict) children = { name: raw_yaml_structure_to_iteration_tree(child) for name, child in self.children.items() } return iteration.CartesianProduct( children, order=self.order, lazy=self.lazy, snake=self.snake, )
[docs] @_iteration_tree_parser.register_class @dataclass class Union(YamlCustomType): """ Union node, see :py:class:`~phileas.iteration.Union` for the supported arguments. """ yaml_tag: ClassVar[str] = "!union" children: dict[Key, Any] | list order: list[Key] | None lazy: bool preset: Literal["first"] | Literal["default"] | None common_preset: bool reset: Literal["first"] | Literal["last"] | Literal["default"] | None @classmethod def from_yaml(cls, constructor: yaml.Constructor, node: yaml.Node) -> Union: if isinstance(node, yaml.MappingNode): mapping = constructor.construct_mapping(node, deep=True) order = mapping.pop("_order", None) lazy = mapping.pop("_lazy", False) preset = mapping.pop("_preset", "first") common_preset = mapping.pop("_common_preset", False) reset = mapping.pop("_reset", "first") return Union( children=mapping, order=order, lazy=lazy, preset=preset, common_preset=common_preset, reset=reset, ) else: children = constructor.construct_sequence(node, deep=True) return Union( children, order=None, lazy=False, preset="first", common_preset=False, reset="first", ) def to_iteration_tree(self) -> iteration.IterationTree: children: dict[Key, iteration.IterationTree] | list[iteration.IterationTree] if isinstance(self.children, list): children = [ raw_yaml_structure_to_iteration_tree(child) for child in self.children ] else: assert isinstance(self.children, dict) children = { name: raw_yaml_structure_to_iteration_tree(child) for name, child in self.children.items() } return iteration.Union( children, order=self.order, lazy=self.lazy, preset=self.preset, common_preset=self.common_preset, reset=self.reset, )
[docs] @_iteration_tree_parser.register_class @dataclass class Zip(YamlCustomType): """ :py:class:`~phileas.iteration.Zip` node, whose iteration behaves like :py:func:`zip`. """ yaml_tag: ClassVar[str] = "!zip" children: dict[Key, Any] | list order: list[Key] | None lazy: bool stops_at: Literal["shortest"] | Literal["longest"] ignore_fixed: bool @classmethod def from_yaml(cls, constructor: yaml.Constructor, node: yaml.Node) -> Zip: if isinstance(node, yaml.MappingNode): mapping = constructor.construct_mapping(node, deep=True) order = mapping.pop("_order", None) lazy = mapping.pop("_lazy", False) stops_at = mapping.pop("_stops_at", "shortest") ignore_fixed = mapping.pop("_ignore_fixed", True) return Zip( children=mapping, order=order, lazy=lazy, stops_at=stops_at, ignore_fixed=ignore_fixed, ) else: children = constructor.construct_sequence(node, deep=True) return Zip( children, order=None, lazy=False, stops_at="shortest", ignore_fixed=True ) def to_iteration_tree(self) -> iteration.IterationTree: children: dict[Key, iteration.IterationTree] | list[iteration.IterationTree] if isinstance(self.children, list): children = [ raw_yaml_structure_to_iteration_tree(child) for child in self.children ] else: assert isinstance(self.children, dict) children = { name: raw_yaml_structure_to_iteration_tree(child) for name, child in self.children.items() } return iteration.Zip( children, order=self.order, lazy=self.lazy, stops_at=self.stops_at, ignore_fixed=self.ignore_fixed, )
[docs] @_iteration_tree_parser.register_class @dataclass class Shuffle(YamlCustomType): """ Shuffle node, whose iteration returns a permutation of its only child. It has a single ``child`` field. """ yaml_tag: ClassVar[str] = "!shuffle" child: Any @classmethod def from_yaml(cls, constructor: yaml.Constructor, node: yaml.Node) -> Shuffle: mapping = constructor.construct_mapping(node, deep=True) try: child = mapping.pop("child") except KeyError as e: raise ValueError("!shuffle requires a child field") from e return Shuffle(child) @classmethod def to_yaml(cls, representer: yaml.Representer, node: Shuffle): if isinstance(node.child, dict): return representer.represent_mapping(cls.yaml_tag, node.child) else: return representer.represent_sequence(cls.yaml_tag, node.child) def to_iteration_tree(self) -> iteration.IterationTree: return iteration.Shuffle(raw_yaml_structure_to_iteration_tree(self.child))
[docs] @_iteration_tree_parser.register_class @dataclass class First(YamlCustomType): """ :py:class:`~phileas.iteration.First` node, which only iterates over the first elements of its child. """ yaml_tag: ClassVar[str] = "!first" child: Any size: int | None @classmethod def from_yaml(cls, constructor: yaml.Constructor, node: yaml.Node) -> First: mapping = constructor.construct_mapping(node, deep=True) try: child = mapping.pop("_child") size = mapping.pop("_size", None) except KeyError as e: raise ValueError(f"!first requires a {e.args[0]} field") from e return First(child, size) @classmethod def to_yaml(cls, representer: yaml.Representer, node: First): return representer.represent_mapping( cls.yaml_tag, {"_size": node.size, "_child": node.child} ) def to_iteration_tree(self) -> iteration.IterationTree: return iteration.First( raw_yaml_structure_to_iteration_tree(self.child), self.size )
[docs] @_iteration_tree_parser.register_class @dataclass class Pick(YamlCustomType): """ Pick node, whose iteration alternatively returns a single of its children. It contains a named mapping, with a ``_default_child`` field, containing the key of its default child. """ yaml_tag: ClassVar[str] = "!pick" children: dict[Key, Any] default_child: Key @classmethod def from_yaml(cls, constructor: yaml.Constructor, node: yaml.Node) -> Pick: mapping = constructor.construct_mapping(node, deep=True) default_child = mapping.pop("_default_child", None) return Pick(children=mapping, default_child=default_child) @classmethod def to_yaml(cls, representer: yaml.Representer, node: Pick): return representer.represent_mapping( cls.yaml_tag, node.children | {"_default_child": node.default_child} ) def to_iteration_tree(self) -> iteration.IterationTree: return iteration.Pick( { name: raw_yaml_structure_to_iteration_tree(config) for name, config in self.children.items() }, default_child=self.default_child, )
#: Numeric type of the Range RT = TypeVar("RT", bound=int | float)
[docs] @_iteration_tree_parser.register_class @dataclass class Range(YamlCustomType, Generic[RT]): """ Range of numbers, that can optionally (but usually will) specify an iteration method. It can be converted to an iteration leaf using :py:meth:`to_iteration_tree`. The ``start`` and ``end`` attributes are mandatory, and ``default`` can be optionally specified. ``steps`` or ``resolution`` (but not both) can be specified. If they are, and ``progression`` is not, or equals ``"linear"``, the range will represent - an :py:class:`~phileas.iteration.IntegerRange` if ``start`` and ``end`` are integers and ``resolution`` is used and is an integer; - an :py:class:`~phileas.iteration.LinearRange` otherwise. If ``progression`` is ``geometric``, the :py:class:`Range` will represent an :py:class:`~phileas.iteration.GeometricRange`. If neither ``steps`` nor ``resolution`` is specified, the range will represent an :py:class:`~phileas.iteration.NumericRange`. """ yaml_tag: ClassVar[str] = "!range" start: RT end: RT default: RT | None = None steps: int | None = None resolution: float | int | None = None progression: Literal["linear"] | Literal["geometric"] = "linear" def __post_init__(self): if self.steps is not None and self.resolution is not None: msg = "!range object must have only one of steps and resolution." raise ValueError(msg) if not isinstance(self.steps, (int, NoneType)): raise ValueError("!range steps parameter must be an integer.") if self.steps is not None and self.steps <= 0: raise ValueError("!range steps must be positive.") if self.resolution is not None and self.resolution <= 0: raise ValueError("!range resolution parameter must be positive.") if self.progression not in ("linear", "geometric"): raise ValueError("!range progression must be linear or geometric.") def to_iteration_tree(self) -> iteration.IterationTree: start = self.start end = self.end default: RT | iteration._NoDefault if self.default is None: default = iteration.no_default else: default = self.default def is_int(i: object) -> bool: return isinstance(i, int) if self.steps is None and self.resolution is None: return iteration.NumericRange(start, end, default_value=default) elif ( is_int(start) and is_int(end) and is_int(self.resolution) and (is_int(default) or default is iteration.no_default) ): assert isinstance(start, int) assert isinstance(end, int) assert isinstance(default, int) or default is iteration.no_default assert isinstance(self.resolution, int) return iteration.IntegerRange( start, end, default_value=default, step=self.resolution # type: ignore[arg-type] ) elif self.progression in (None, "linear"): assert isinstance(end, (int, float)) assert isinstance(start, (int, float)) if self.resolution is not None: steps = ceil(abs(end - start) / self.resolution) + 1 # type: ignore[operator] else: assert self.steps is not None steps = self.steps return iteration.LinearRange(start, end, default_value=default, steps=steps) else: if self.resolution is not None: global_ratio = self.end / self.start # type: ignore[operator] if global_ratio < 1: global_ratio = 1 / global_ratio steps = ceil(np.log(global_ratio) / np.log(self.resolution)) + 1 else: assert self.steps is not None steps = self.steps return iteration.GeometricRange( start, end, default_value=default, steps=steps )
[docs] @_iteration_tree_parser.register_class @dataclass class Sequence(YamlCustomType): yaml_tag: ClassVar[str] = "!sequence" elements: list[iteration.DataTree] default: iteration.DataTree | None = None @classmethod def to_yaml(cls, representer: yaml.Representer, node: Sequence): if node.default is None: return representer.represent_sequence(cls.yaml_tag, node.elements) return representer.represent_mapping( cls.yaml_tag, {"elements": node.elements, "default": node.default} ) @classmethod def from_yaml(cls, constructor: yaml.Constructor, node: yaml.Node) -> Sequence: if isinstance(node, yaml.SequenceNode): elements = constructor.construct_sequence(node, deep=True) default = None elif isinstance(node, yaml.MappingNode): mapping = constructor.construct_mapping(node, deep=True) elements = mapping["elements"] if not isinstance(elements, list): raise TypeError("!sequence elements field must be a sequence") default = mapping.get("default", None) else: msg = "!sequence must be a scalar sequence of a mapping with keys " msg += "elements [and default]" raise TypeError(msg) return Sequence(elements=elements, default=default) def to_iteration_tree(self) -> iteration.IterationTree: default: iteration.DataTree | iteration._NoDefault if self.default is None: default = iteration.no_default else: default = self.default return iteration.Sequence(self.elements, default)
[docs] @_iteration_tree_parser.register_class @dataclass class Random(YamlCustomType): yaml_tag: ClassVar[str] = "!random" distribution: str parameters: dict[str, Any] size: int | None = None default: iteration.DataTree | None = None def to_iteration_tree(self) -> iteration.IterationTree: return NumpyRNG( seed=None, size=self.size, default_value=self.default, distribution=getattr(np.random.Generator, self.distribution), kwargs=self.parameters, )
[docs] @_iteration_tree_parser.register_class @dataclass class UniformBigIntegerRng(YamlCustomType): yaml_tag: ClassVar[str] = "!random_uniform_bigint" high: int low: int = 0 size: int | None = None default: iteration.DataTree | None = None def to_iteration_tree(self) -> iteration.IterationTree: return iteration.UniformBigIntegerRng( seed=None, size=self.size, default_value=self.default, low=self.low, high=self.high, )
[docs] @_iteration_tree_parser.register_class @dataclass class PrimeRng(YamlCustomType): yaml_tag: ClassVar[str] = "!random_prime" high: int low: int = 0 size: int | None = None default: iteration.DataTree | None = None def to_iteration_tree(self) -> iteration.IterationTree: return iteration.PrimeRng( seed=None, size=self.size, default_value=self.default, low=self.low, high=self.high, )
### Conversion to iteration tree ### def raw_yaml_structure_to_iteration_tree(structure: Any) -> iteration.IterationTree: if isinstance(structure, list): list_children = list(map(raw_yaml_structure_to_iteration_tree, structure)) return iteration.CartesianProduct(list_children) elif isinstance(structure, dict): dict_children = { key: raw_yaml_structure_to_iteration_tree(value) for key, value in structure.items() } return iteration.CartesianProduct(dict_children) elif isinstance(structure, YamlCustomType): return structure.to_iteration_tree() elif isinstance(structure, (NoneType, bool, str, int, float)): return iteration.IterationLiteral(structure) # type: ignore[type-var] else: raise ValueError(f"Unsupported value {structure}.")