Source code for itwinai.torch.config

"""Default configuration"""

from pydantic import BaseModel


[docs] class Configuration(BaseModel, extra='allow'): """Base configuration class.""" def __getitem__(self, idx): return self.__getattribute__(idx)
[docs] class TrainingConfiguration(Configuration): """Default configuration object for training. Override and/or create new configurations using the constructor. Example: >>> cfg = TrainingConfiguration(batch_size=2, param_a=42) >>> print(cfg.batch_size) # returns 17 (overrides default) >>> print(cfg.param_a) # returns 42 (new value) >>> print(cfg.pin_memory) # returns the default value >>> >>> from rich import print >>> print(cfg) # pretty-print of configuration """ #: Batch size. In a distributed environment it is usually the #: per-worker batch size. Defaults to 32. batch_size: int = 32 #: Whether to shuffle train dataset when creating a torch ``DataLoader``. #: Defaults to False. shuffle_train: bool = False #: Whether to shuffle validation dataset when creating a torch #: ``DataLoader``. #: Defaults to False. shuffle_validation: bool = False #: Whether to shuffle test dataset when creating a torch ``DataLoader``. #: Defaults to False. shuffle_test: bool = False #: Whether to pin GPU memory. Property of torch ``DataLoader``. #: Defaults to False. pin_memory: bool = False #: Number of parallel workers used by torch ``DataLoader``. #: Defaults to 4. num_workers: int = 4 #: Learning rate used by the optimizer. Defaults to 1e-3. lr: float = 1e-3 #: Momentum used by some optimizers (e.g., SGD). Defaults to 0.9. momentum: float = .9 #: Parameter of Horovod's ``DistributedOptimizer``: uses float16 #: operations in the allreduce #: distributed gradients aggregation. Better performances at #: lower precision. Defaults to False. fp16_allreduce: bool = False #: Parameter of Horovod's ``DistributedOptimizer``: use Adasum #: optimization. #: Defaults to False. use_adasum: bool = False #: Parameter of Horovod's ``DistributedOptimizer``: scale #: gradients before adding them up. #: Defaults to 1.0. gradient_predivide_factor: float = 1.0