Source code for itwinai.torch.config

"""Default configuration"""

from typing import Literal

from pydantic import BaseModel



[docs]
class Configuration(BaseModel, extra='allow'):
    """Base configuration class."""

    def __getitem__(self, idx):
        return self.__getattribute__(idx)




[docs]
class TrainingConfiguration(Configuration):
    """Default configuration object for training.
    Override and/or create new configurations using the constructor.

    Example:

    >>> cfg = TrainingConfiguration(batch_size=17, param_a=42)
    >>> print(cfg.batch_size)  # returns 17 (overrides default)
    >>> print(cfg.param_a)     # returns 42 (new value)
    >>> print(cfg.pin_memory)  # returns the default value
    >>>
    >>> from rich import print
    >>> print(cfg)             # pretty-print of configuration

    """
    #: Batch size. In a distributed environment it is usually the
    #: per-worker batch size. Defaults to 32.
    batch_size: int = 32
    #: Whether to shuffle train dataset when creating a torch ``DataLoader``.
    #: Defaults to False.
    shuffle_train: bool = False
    #: Whether to shuffle validation dataset when creating a torch
    #: ``DataLoader``.
    #: Defaults to False.
    shuffle_validation: bool = False
    #: Whether to shuffle test dataset when creating a torch ``DataLoader``.
    #: Defaults to False.
    shuffle_test: bool = False
    #: Whether to pin GPU memory. Property of torch ``DataLoader``.
    #: Defaults to False.
    pin_gpu_memory: bool = False
    #: Number of parallel workers used by torch ``DataLoader``.
    #: Defaults to 4.
    num_workers_dataloader: int = 4
    #: Loss function. Defaults to 'cross_entropy'
    loss: Literal[
        'mse',
        'nllloss',
        'cross_entropy'
    ] = 'cross_entropy'
    #: Name of the optimizer to use. Defaults to 'adam'.
    optimizer: Literal['adadelta', 'adam', 'rmsprop', 'sgd'] = 'adam'
    #: Learning rate used by the optimizer. Defaults to 1e-3.
    optim_lr: float = 1e-3
    #: Momentum used by some optimizers (e.g., SGD). Defaults to 0.9.
    optim_momentum: float = .9
    #: Weight decay parameter for the optimizer. Defaults to 0.
    optim_weight_decay: float = .0
    #: Parameter of Horovod's ``DistributedOptimizer``: uses float16
    #: operations in the allreduce
    #: distributed gradients aggregation. Better performances at
    #: lower precision. Defaults to False.
    fp16_allreduce: bool = False
    #: Parameter of Horovod's ``DistributedOptimizer``: use Adasum
    #: optimization.
    #: Defaults to False.
    use_adasum: bool = False
    #: Parameter of Horovod's ``DistributedOptimizer``: scale
    #: gradients before adding them up.
    #: Defaults to 1.0.
    gradient_predivide_factor: float = 1.0