Source code for ltsm.data_provider.tokenizer.standard_scaler

import os
import pickle
import numpy as np
from sklearn.preprocessing import StandardScaler as SKStandardScaler

from ltsm.common.base_processor import BaseProcessor
from typing import Tuple, List


[docs] class StandardScaler(BaseProcessor): """ Represents a Standard Scaler object that uses Sklearn's Standard Scaler for data processing. Attributes: module_id (str): The identifier for base processor objects. """ module_id = "standard_scaler" def __init__(self): self._scaler = None
[docs] def process(self, raw_data: np.ndarray, train_data: List[np.ndarray], val_data: List[np.ndarray], test_data: List[np.ndarray], fit_train_only:bool=False, do_anomaly:bool=False)->Tuple[List[np.ndarray], List[np.ndarray], List[np.ndarray]]: """ Standardizes the training, validation, and test sets by removing the mean and scaling to unit variance. Args: raw_data (np.ndarray): The raw data. train_data (List[np.ndarray]): The list of training sequences. val_data (List[np.ndarray]): The list of validation sequences. test_data (List[np.ndarray]): The list of test sequences. fit_train_only (bool): Indicates whether the datasets should be scaled based on the training data. Returns: Tuple[List[np.ndarray], List[np.ndarray], List[np.ndarray]]: A tuple of three lists containing the processed training, validation, and test data. """ scaled_train_data, scaled_val_data, scaled_test_data = [], [], [] for i, (raw_sequence, train_sequence, val_sequence, test_sequence) in enumerate(zip( raw_data, train_data, val_data, test_data, )): if do_anomaly and i == len(raw_data) - 1: # Skip anomaly label scaled_train_data.append(train_sequence) scaled_val_data.append(val_sequence) scaled_test_data.append(test_sequence) continue train_sequence = train_sequence.reshape(-1, 1) val_sequence = val_sequence.reshape(-1, 1) test_sequence = test_sequence.reshape(-1, 1) self._scaler = SKStandardScaler() if fit_train_only: self._scaler.fit(train_sequence) else: self._scaler.fit(raw_sequence.reshape(-1, 1)) scaled_train_data.append(self._scaler.transform(train_sequence).flatten()) scaled_val_data.append(self._scaler.transform(val_sequence).flatten()) scaled_test_data.append(self._scaler.transform(test_sequence).flatten()) return scaled_train_data, scaled_val_data, scaled_test_data
[docs] def inverse_process(self, data: np.ndarray)->np.ndarray: """ Scales back the data to its original representation. Args: data (np.ndarray): The data to scale back. Returns: np.ndarray: The scaled back data. """ assert self._scaler is not None, "StandardScaler has not been fitted" raw_shape = data.shape data = self._scaler.inverse_transform(data.reshape(-1, 1)) return data.reshape(raw_shape)
[docs] def save(self, save_dir: str): """ Saves the scaler to the save_dir directory as a Pickle file named processor.pkl. Args: save_dir (str): The directory where to store the scaler. """ save_path = os.path.join(save_dir, "processor.pkl") with open(save_path, 'wb') as f: pickle.dump(self._scaler, f)
[docs] def load(self, save_dir): """ Loads the scaler saved at the save_dir directory. Args: save_dir (str): The directory the scaler was saved. """ save_path = os.path.join(save_dir, "processor.pkl") with open(save_path, 'rb') as f: self._scaler = pickle.load(f)