ITMO-NSS-team · Lopa10ko · Mar 14, 2024 · Mar 14, 2024 · Mar 14, 2024 · Mar 20, 2024
diff --git a/experiments/univariate/benchmark_launch.py b/experiments/univariate/benchmark_launch.py
@@ -1,16 +1,18 @@
 import torch
 
 from pytsbe.benchmark import BenchmarkUnivariate
+from pytsbe.paths import get_project_path
 
 seasons = ['Daily', 'Monthly', 'Quarterly', 'Weekly', 'Yearly']
 
 
 def start_benchmark():
     """ Launch benchmark with desired configuration """
-    print(torch.cuda.is_available())
+    print(f'CUDA is available: {torch.cuda.is_available()}')
+    print(get_project_path())
     for season in seasons:
-        pytsbe = BenchmarkUnivariate(working_dir=f'./benchmark_results_{season}',
-                                     config_path=f'configuration{season}.yaml')
+        pytsbe = BenchmarkUnivariate(working_dir=f'{get_project_path()}/experiments/univariate/benchmark_results_{season}',
+                                     config_path=f'{get_project_path()}/experiments/univariate/configuration{season}.yaml')
         pytsbe.run()
 
 

diff --git a/pytsbe/models/automl_forecasters/automl.py b/pytsbe/models/automl_forecasters/automl.py
@@ -4,7 +4,7 @@
 import numpy as np
 import pandas as pd
 from fedot.core.pipelines.tuning.tuner_builder import TunerBuilder
-from fedot.core.repository.quality_metrics_repository import RegressionMetricsEnum
+from fedot.core.repository.metrics_repository import RegressionMetricsEnum
 from golem.core.tuning.simultaneous import SimultaneousTuner
 
 from pytsbe.data.forecast_output import ForecastResults

diff --git a/pytsbe/models/chronos/__init__.py b/pytsbe/models/chronos/__init__.py
diff --git a/pytsbe/models/chronos/chronos_forecaster.py b/pytsbe/models/chronos/chronos_forecaster.py
@@ -0,0 +1,50 @@
+import numpy as np
+import pandas as pd
+import torch
+
+from pytsbe.data.forecast_output import ForecastResults
+from pytsbe.models.forecast import Forecaster
+
+try:
+    from chronos import BaseChronosPipeline
+except ImportError:
+    print('Try installing Chronos via pip install '
+          'git+https://github.com/amazon-science/chronos-forecasting.git')
+
+
+class ChronosForecaster(Forecaster):
+    """
+    Class for time series forecasting with Chronos pretrained models
+    Source code: https://github.com/amazon-science/chronos-forecasting
+    """
+
+    def __init__(self, **params):
+        super().__init__(**params)
+        self.target = 'value'
+        self.forecaster = self.__load_pretrained_pipeline(params.get('hf_model', 'amazon/chronos-t5-tiny'))
+
+    def fit_univariate_ts(self, historical_values: pd.DataFrame, forecast_horizon: int, **kwargs):
+        pass
+
+    def fit_multivariate_ts(self, historical_values: pd.DataFrame, forecast_horizon: int, target_column: str,
+                            predictors_columns: list, **kwargs):
+        raise NotImplementedError('Chronos does not support fit for multivariate time series forecasting')
+
+    def predict_univariate_ts(self, historical_values: pd.DataFrame, forecast_horizon: int, **kwargs):
+        forecast = self.forecaster.predict(
+            torch.tensor(historical_values[self.target].values),
+            prediction_length=forecast_horizon
+        )
+        return ForecastResults(predictions=np.median(forecast[0].numpy(), axis=0))
+
+    def predict_multivariate_ts(self, historical_values: pd.DataFrame, forecast_horizon: int, target_column: str,
+                                predictors_columns: list, **kwargs):
+        raise NotImplementedError('Chronos does not support predict for multivariate time series forecasting')
+
+    @staticmethod
+    def __load_pretrained_pipeline(hf_model: str) -> BaseChronosPipeline:
+        return BaseChronosPipeline.from_pretrained(
+            hf_model,
+            torch_dtype=torch.bfloat16,
+            device_map="cuda" if torch.cuda.is_available() else "cpu"
+        )
diff --git a/pytsbe/models/fedot_industrial_forecaster.py b/pytsbe/models/fedot_industrial_forecaster.py
@@ -0,0 +1,98 @@
+import pandas as pd
+import numpy as np
+import shutil
+
+try:
+    from fedot_ind.api.main import FedotIndustrial
+    from fedot_ind.core.repository.config_repository import DEFAULT_COMPUTE_CONFIG
+except ImportError:
+    print('Does not found Fedot.Industrial library. Continue...')
+
+
+from pytsbe.data.forecast_output import ForecastResults
+from pytsbe.models.forecast import Forecaster
+
+import logging
+logging.raiseExceptions = False
+
+
+class FedotIndustrialForecaster(Forecaster):
+    """
+    Class for time series forecasting with FEDOT.Industrial framework
+    Source code: https://github.com/aimclub/Fedot.Industrial
+    """
+
+    def __init__(self, **params):
+        super().__init__(**params)
+        default_api_config = init_default_config()
+        self.init_params = {**default_api_config, **params}
+        self.obtained_model = None
+
+    def fit_univariate_ts(self, historical_values: pd.DataFrame, forecast_horizon: int, **kwargs):
+        """ Train FEDOT.Industrial framework (launch AutoML algorithm) """
+        input_data = prepare_input_ts_data(historical_values, forecast_horizon, is_for_forecast=False)
+
+        task_params = {'forecast_length': forecast_horizon}
+        self.init_params['industrial_config']['task_params'] = task_params
+        self.init_params['automl_config']['task_params'] = task_params
+
+        model = FedotIndustrial(**self.init_params)
+        model.fit(input_data)
+        self.obtained_model = model
+        self.obtained_model.shutdown()
+        # # TODO: remove when composition history managing becomes a responsibility of Fedot.Industrial
+        # shutil.rmtree(model.config_dict.get('history_dir'))
+
+    def fit_multivariate_ts(self, historical_values: pd.DataFrame, forecast_horizon: int,
+                            target_column: str, predictors_columns: list, **kwargs):
+        """ Create pipeline for multivariate time series forecasting """
+        raise NotImplementedError()
+
+    def predict_univariate_ts(self, historical_values: pd.DataFrame, forecast_horizon: int, **kwargs):
+        """ Use obtained pipeline to make predictions """
+        input_data = prepare_input_ts_data(historical_values, forecast_horizon, is_for_forecast=True)
+        labels = self.obtained_model.predict(input_data)
+        return ForecastResults(predictions=labels)
+
+    def predict_multivariate_ts(self, historical_values: pd.DataFrame, forecast_horizon: int,
+                                target_column: str, predictors_columns: list, **kwargs):
+        raise NotImplementedError()
+
+
+def prepare_input_ts_data(historical_values: pd.DataFrame, forecast_horizon: int, is_for_forecast: bool):
+    """ Return converted into InputData datasets for train and for prediction """
+    time_series_label = 'value'
+    series = np.array(historical_values[time_series_label]).flatten()
+    if is_for_forecast:
+        return series, series
+    return series, series[-forecast_horizon:]
+
+
+def init_default_config():
+    COMPUTE_CONFIG = DEFAULT_COMPUTE_CONFIG
+    COMPUTE_CONFIG['distributed'] = dict(processes=False,
+                                         n_workers=2,
+                                         threads_per_worker=2,
+                                         memory_limit=0.3)
+    AUTOML_CONFIG = {'task': 'ts_forecasting',
+                     'task_params': {'forecast_length': 1},
+                     'use_automl': True,
+                     'optimisation_strategy': {'optimisation_strategy':
+                                                   {'mutation_agent': 'random',
+                                                    'mutation_strategy': 'growth_mutation_strategy'},
+                                               'optimisation_agent': 'Industrial'}}
+    AUTOML_LEARNING_STRATEGY = dict(timeout=5,
+                                    n_jobs=4,
+                                    pop_size=10,
+                                    with_tuning=False,
+                                    logging_level=20)
+    LEARNING_CONFIG = {'learning_strategy': 'from_scratch',
+                       'learning_strategy_params': AUTOML_LEARNING_STRATEGY,
+                       'optimisation_loss': {'quality_loss': 'rmse'}}
+    INDUSTRIAL_CONFIG = {'problem': 'ts_forecasting',
+                         'task_params': {'forecast_length': 1}}
+
+    return {'industrial_config': INDUSTRIAL_CONFIG,
+            'automl_config': AUTOML_CONFIG,
+            'learning_config': LEARNING_CONFIG,
+            'compute_config': COMPUTE_CONFIG}
diff --git a/pytsbe/models/lagllama/__init__.py b/pytsbe/models/lagllama/__init__.py
diff --git a/pytsbe/models/lagllama/lag_llama/__init__.py b/pytsbe/models/lagllama/lag_llama/__init__.py
diff --git a/pytsbe/models/lagllama/lag_llama/data/__init__.py b/pytsbe/models/lagllama/lag_llama/data/__init__.py
diff --git a/pytsbe/models/lagllama/lag_llama/data/augmentations/__init__.py b/pytsbe/models/lagllama/lag_llama/data/augmentations/__init__.py
diff --git a/pytsbe/models/lagllama/lag_llama/data/augmentations/freq_mask.py b/pytsbe/models/lagllama/lag_llama/data/augmentations/freq_mask.py
@@ -0,0 +1,40 @@
+import torch
+
+
+@torch.no_grad()
+def freq_mask(x, y, rate=0.1, dim=1):
+    # Get lengths of the input tensors along the specified dimension.
+    x_len = x.shape[dim]
+    y_len = y.shape[dim]
+
+    # Concatenate x and y along the specified dimension.
+    # x and y represent past and future targets respectively.
+    xy = torch.cat([x, y], dim=dim)
+
+    # Perform a real-valued fast Fourier transform (RFFT) on the concatenated tensor.
+    # This transforms the time series data into the frequency domain.
+    xy_f = torch.fft.rfft(xy, dim=dim)
+
+    # Create a random mask with a probability defined by 'rate'.
+    # This mask will be used to randomly select frequencies to be zeroed out.
+    m = torch.rand_like(xy_f, dtype=xy.dtype) < rate
+
+    # Apply the mask to the real and imaginary parts of the frequency data,
+    # setting the selected frequencies to zero. This 'masks' those frequencies.
+    freal = xy_f.real.masked_fill(m, 0)
+    fimag = xy_f.imag.masked_fill(m, 0)
+
+    # Combine the masked real and imaginary parts back into complex frequency data.
+    xy_f = torch.complex(freal, fimag)
+
+    # Perform an inverse RFFT to transform the data back to the time domain.
+    # The masked frequencies will affect the reconstructed time series.
+    xy = torch.fft.irfft(xy_f, dim=dim)
+
+    # If the reconstructed data length differs from the original concatenated length,
+    # adjust it to maintain consistency. This step ensures the output shape matches the input.
+    if x_len + y_len != xy.shape[dim]:
+        xy = torch.cat([x[:, 0:1, ...], xy], 1)
+
+    # Split the reconstructed data back into two parts corresponding to the original x and y.
+    return torch.split(xy, [x_len, y_len], dim=dim)
diff --git a/pytsbe/models/lagllama/lag_llama/data/augmentations/freq_mix.py b/pytsbe/models/lagllama/lag_llama/data/augmentations/freq_mix.py
@@ -0,0 +1,63 @@
+import numpy as np
+import torch
+
+
+@torch.no_grad()
+def freq_mix(x, y, rate=0.1, dim=1):
+    # Get lengths of the input tensors along the specified dimension.
+    x_len = x.shape[dim]
+    y_len = y.shape[dim]
+
+    # Concatenate x and y along the specified dimension.
+    # x and y represent past and future targets respectively.
+    xy = torch.cat([x, y], dim=dim)
+
+    # Perform a real-valued fast Fourier transform (RFFT) on the concatenated tensor.
+    xy_f = torch.fft.rfft(xy, dim=dim)
+
+    # Create a random mask with a probability defined by 'rate'.
+    # This mask will be used to select which frequencies to manipulate.
+    m = torch.rand_like(xy_f, dtype=xy.dtype) < rate
+
+    # Calculate the amplitude of the frequency components.
+    amp = abs(xy_f)
+
+    # Sort the amplitudes and create a mask to ignore the most dominant frequencies.
+    _, index = amp.sort(dim=dim, descending=True)
+    dominant_mask = index > 2
+    m = torch.bitwise_and(m, dominant_mask)
+
+    # Apply the mask to the real and imaginary parts of the frequency data,
+    # setting masked frequencies to zero.
+    freal = xy_f.real.masked_fill(m, 0)
+    fimag = xy_f.imag.masked_fill(m, 0)
+
+    # Shuffle the batches in x and y to mix data from different sequences.
+    b_idx = np.arange(x.shape[0])
+    np.random.shuffle(b_idx)
+    x2, y2 = x[b_idx], y[b_idx]
+
+    # Concatenate the shuffled tensors and perform RFFT.
+    xy2 = torch.cat([x2, y2], dim=dim)
+    xy2_f = torch.fft.rfft(xy2, dim=dim)
+
+    # Invert the mask and apply it to the shuffled frequency data.
+    m = torch.bitwise_not(m)
+    freal2 = xy2_f.real.masked_fill(m, 0)
+    fimag2 = xy2_f.imag.masked_fill(m, 0)
+
+    # Combine the original and shuffled frequency data.
+    freal += freal2
+    fimag += fimag2
+
+    # Reconstruct the complex frequency data and perform an inverse RFFT.
+    xy_f = torch.complex(freal, fimag)
+    xy = torch.fft.irfft(xy_f, dim=dim)
+
+    # If the reconstructed data length differs from the original concatenated length,
+    # adjust it to maintain consistency.
+    if x_len + y_len != xy.shape[dim]:
+        xy = torch.cat([x[:, 0:1, ...], xy], 1)
+
+    # Split the reconstructed data back into two parts corresponding to the original x and y.
+    return torch.split(xy, [x_len, y_len], dim=dim)