Source code for ltsm.data_provider.data_loader

import os
import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
import warnings
from pathlib import Path

from torch.utils.data.dataset import ConcatDataset, Dataset

from ltsm.utils.timefeatures import time_features
from ltsm.utils.tools import convert_tsf_to_dataframe

from typing import Dict

warnings.filterwarnings('ignore')

[docs] class HF_Dataset(Dataset): """ Custom dataset class that wraps a PyTorch 'Dataset' class to extract the input and output sequences. Attributes: dataset (Dataset): The underlying Dataset object. """ def __init__(self, dataset): """ Initializes the HF_Dataset with the underlying dataset. Args: dataset (Dataset): The underlying Dataset object. """ super().__init__() self.dataset = dataset def __read_data__(self): """ Read """ return self.dataset.__read_data__() def __len__(self) -> int: """ Returns the total number of samples in the dataset. Returns: int: The number of samples in the dataset. """ return self.dataset.__len__()
[docs] def inverse_transform(self, data): return self.dataset.inverse_transform(data)
[docs] def add_data(self, df): return self.dataset.add_data(df)
def __getitem__(self, index: int) -> Dict[str, torch.Tensor]: """ Retrieves a single input sequence and label from the dataset. Args: index (int): Index of data point. Returns: Dict[str, torch.Tensor]: Dictionary containing the input data and labels as torch Tensors objects. """ outputs = self.dataset.__getitem__(index) seq_x = outputs[0] seq_y = outputs[1] return { "input_data": seq_x, "labels": seq_y }
[docs] class HF_Timestamp_Dataset(Dataset): def __init__(self, dataset): super().__init__() self.dataset = dataset def __read_data__(self): return self.dataset.__read_data__() def __len__(self): return self.dataset.__len__()
[docs] def inverse_transform(self, data): return self.dataset.inverse_transform(data)
[docs] def add_data(self, df): return self.dataset.add_data(df)
def __getitem__(self, index): seq_x, seq_y, seq_x_mark, seq_y_mark = self.dataset.__getitem__(index) return { "input_data": seq_x, "labels": seq_y, "timestamp_input": seq_x_mark, "timestamp_labels": seq_y_mark }
[docs] class Dataset_ETT_hour(Dataset): def __init__( self, data_path, split='train', size=None, features='S', target='OT', scale=True, timeenc=0, freq='h', percent=100, max_len=-1, train_all=False, ): # size [seq_len, pred_len] # info if size == None: self.seq_len = 24 * 4 * 4 self.pred_len = 24 * 4 else: self.seq_len, self.pred_len = size # init assert split in ['train', 'test', 'val'] type_map = {'train': 0, 'val': 1, 'test': 2} self.set_type = type_map[split] self.percent = percent self.features = features self.target = target self.scale = scale self.timeenc = timeenc self.freq = freq self.data_path = data_path self.__read_data__() self.enc_in = self.data_x.shape[-1] print("self.enc_in = {}".format(self.enc_in)) print("self.data_x = {}".format(self.data_x.shape)) self.tot_len = len(self.data_x) - self.seq_len - self.pred_len + 1 def __read_data__(self): self.scaler = StandardScaler() df_raw = pd.read_csv(self.data_path) border1s = [0, 12 * 30 * 24 - self.seq_len, 12 * 30 * 24 + 4 * 30 * 24 - self.seq_len] border2s = [12 * 30 * 24, 12 * 30 * 24 + 4 * 30 * 24, 12 * 30 * 24 + 8 * 30 * 24] border1 = border1s[self.set_type] border2 = border2s[self.set_type] if self.set_type == 0: border2 = (border2 - self.seq_len) * self.percent // 100 + self.seq_len if self.features == 'M' or self.features == 'MS': cols_data = df_raw.columns[1:] df_data = df_raw[cols_data] elif self.features == 'S': df_data = df_raw[[self.target]] if self.scale: train_data = df_data[border1s[0]:border2s[0]] self.scaler.fit(train_data.values) data = self.scaler.transform(df_data.values) else: data = df_data.values df_stamp = df_raw[['date']][border1:border2] df_stamp['date'] = pd.to_datetime(df_stamp.date) if self.timeenc == 0: df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1) df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1) df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1) df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1) data_stamp = df_stamp.drop(['date'], 1).values elif self.timeenc == 1: data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq) data_stamp = data_stamp.transpose(1, 0) self.data_x = data[border1:border2] self.data_y = data[border1:border2] self.data_stamp = data_stamp def __getitem__(self, index): feat_id = index // self.tot_len s_begin = index % self.tot_len s_end = s_begin + self.seq_len r_begin = s_end r_end = r_begin + self.pred_len if self.enc_in > 1: seq_x = self.data_x[s_begin:s_end] seq_y = self.data_y[r_begin:r_end] else: seq_x = self.data_x[s_begin:s_end, feat_id:feat_id+1] seq_y = self.data_y[r_begin:r_end, feat_id:feat_id+1] seq_x_mark = self.data_stamp[s_begin:s_end] seq_y_mark = self.data_stamp[r_begin:r_end] return seq_x, seq_y, seq_x_mark, seq_y_mark def __len__(self): return (len(self.data_x) - self.seq_len - self.pred_len + 1) * self.enc_in
[docs] def inverse_transform(self, data): return self.scaler.inverse_transform(data)
[docs] class Dataset_ETT_minute(Dataset): def __init__( self, data_path, split='train', size=None, features='S', target='OT', scale=True, timeenc=0, freq='t', percent=100, max_len=-1, train_all=False ): # size [seq_len, pred_len] # info if size == None: self.seq_len = 24 * 4 * 4 self.pred_len = 24 * 4 else: self.seq_len, self.pred_len = size # init assert split in ['train', 'test', 'val'] type_map = {'train': 0, 'val': 1, 'test': 2} self.set_type = type_map[split] self.features = features self.target = target self.scale = scale self.timeenc = timeenc self.freq = freq self.percent = percent self.data_path = data_path self.__read_data__() self.enc_in = self.data_x.shape[-1] self.tot_len = len(self.data_x) - self.seq_len - self.pred_len + 1 def __read_data__(self): self.scaler = StandardScaler() df_raw = pd.read_csv(self.data_path) border1s = [0, 12 * 30 * 24 * 4 - self.seq_len, 12 * 30 * 24 * 4 + 4 * 30 * 24 * 4 - self.seq_len] border2s = [12 * 30 * 24 * 4, 12 * 30 * 24 * 4 + 4 * 30 * 24 * 4, 12 * 30 * 24 * 4 + 8 * 30 * 24 * 4] border1 = border1s[self.set_type] border2 = border2s[self.set_type] if self.set_type == 0: border2 = (border2 - self.seq_len) * self.percent // 100 + self.seq_len if self.features == 'M' or self.features == 'MS': cols_data = df_raw.columns[1:] df_data = df_raw[cols_data] elif self.features == 'S': df_data = df_raw[[self.target]] if self.scale: train_data = df_data[border1s[0]:border2s[0]] self.scaler.fit(train_data.values) data = self.scaler.transform(df_data.values) else: data = df_data.values df_stamp = df_raw[['date']][border1:border2] df_stamp['date'] = pd.to_datetime(df_stamp.date) if self.timeenc == 0: df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1) df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1) df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1) df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1) df_stamp['minute'] = df_stamp.date.apply(lambda row: row.minute, 1) df_stamp['minute'] = df_stamp.minute.map(lambda x: x // 15) data_stamp = df_stamp.drop(['date'], 1).values elif self.timeenc == 1: data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq) data_stamp = data_stamp.transpose(1, 0) self.data_x = data[border1:border2] self.data_y = data[border1:border2] self.data_stamp = data_stamp def __getitem__(self, index): feat_id = index // self.tot_len s_begin = index % self.tot_len s_end = s_begin + self.seq_len r_begin = s_end r_end = r_begin + self.pred_len if self.enc_in > 1: seq_x = self.data_x[s_begin:s_end] seq_y = self.data_y[r_begin:r_end] else: seq_x = self.data_x[s_begin:s_end, feat_id:feat_id+1] seq_y = self.data_y[r_begin:r_end, feat_id:feat_id+1] seq_x_mark = self.data_stamp[s_begin:s_end] seq_y_mark = self.data_stamp[r_begin:r_end] return seq_x, seq_y, seq_x_mark, seq_y_mark def __len__(self): return (len(self.data_x) - self.seq_len - self.pred_len + 1) * self.enc_in
[docs] def inverse_transform(self, data): return self.scaler.inverse_transform(data)
[docs] class Dataset_Custom(Dataset): def __init__( self, data_path, split='train', size=None, features='S', target='OT', scale=True, timeenc=0, freq='h', percent=10, max_len=-1, train_all=False ): # size [seq_len, pred_len] # info if size == None: self.seq_len = 24 * 4 * 4 self.pred_len = 24 * 4 else: self.seq_len, self.pred_len = size # init assert split in ['train', 'test', 'val'] type_map = {'train': 0, 'val': 1, 'test': 2} self.set_type = type_map[split] self.features = features self.target = target self.scale = scale self.timeenc = timeenc self.freq = freq self.percent = percent self.data_path = data_path self.__read_data__() self.enc_in = self.data_x.shape[-1] self.tot_len = len(self.data_x) - self.seq_len - self.pred_len + 1 def __read_data__(self): self.scaler = StandardScaler() df_raw = pd.read_csv(self.data_path) ''' df_raw.columns: ['date', ...(other features), target feature] ''' cols = list(df_raw.columns) cols.remove(self.target) cols.remove('date') df_raw = df_raw[['date'] + cols + [self.target]] # print(cols) num_train = int(len(df_raw) * 0.7) num_test = int(len(df_raw) * 0.2) num_vali = len(df_raw) - num_train - num_test border1s = [0, num_train - self.seq_len, len(df_raw) - num_test - self.seq_len] border2s = [num_train, num_train + num_vali, len(df_raw)] border1 = border1s[self.set_type] border2 = border2s[self.set_type] if self.set_type == 0: border2 = (border2 - self.seq_len) * self.percent // 100 + self.seq_len if self.features == 'M' or self.features == 'MS': cols_data = df_raw.columns[1:] df_data = df_raw[cols_data] elif self.features == 'S': df_data = df_raw[[self.target]] if self.scale: train_data = df_data[border1s[0]:border2s[0]] self.scaler.fit(train_data.values) data = self.scaler.transform(df_data.values) else: data = df_data.values df_stamp = df_raw[['date']][border1:border2] df_stamp['date'] = pd.to_datetime(df_stamp.date) if self.timeenc == 0: df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1) df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1) df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1) df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1) data_stamp = df_stamp.drop(['date'], 1).values elif self.timeenc == 1: data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq) data_stamp = data_stamp.transpose(1, 0) self.data_x = data[border1:border2] self.data_y = data[border1:border2] self.data_stamp = data_stamp def __getitem__(self, index): feat_id = index // self.tot_len s_begin = index % self.tot_len s_end = s_begin + self.seq_len r_begin = s_end r_end = r_begin + self.pred_len if self.enc_in > 1: seq_x = self.data_x[s_begin:s_end] seq_y = self.data_y[r_begin:r_end] else: seq_x = self.data_x[s_begin:s_end, feat_id:feat_id+1] seq_y = self.data_y[r_begin:r_end, feat_id:feat_id+1] seq_x_mark = self.data_stamp[s_begin:s_end] seq_y_mark = self.data_stamp[r_begin:r_end] return seq_x, seq_y, seq_x_mark, seq_y_mark def __len__(self): return (len(self.data_x) - self.seq_len - self.pred_len + 1) * self.enc_in
[docs] def inverse_transform(self, data): return self.scaler.inverse_transform(data)
[docs] class Dataset_Pred(Dataset): def __init__( self, data_path, split='pred', size=None, features='S', target='OT', scale=True, inverse=False, timeenc=0, freq='15min', cols=None, percent=None, train_all=False, ): # size [seq_len, pred_len] # info if size == None: self.seq_len = 24 * 4 * 4 self.pred_len = 24 * 4 else: self.seq_len, self.pred_len = size # init assert split in ['pred'] self.features = features self.target = target self.scale = scale self.inverse = inverse self.timeenc = timeenc self.freq = freq self.cols = cols self.data_path = data_path self.__read_data__() def __read_data__(self): self.scaler = StandardScaler() df_raw = pd.read_csv(self.data_path) ''' df_raw.columns: ['date', ...(other features), target feature] ''' if self.cols: cols = self.cols.copy() cols.remove(self.target) else: cols = list(df_raw.columns) cols.remove(self.target) cols.remove('date') df_raw = df_raw[['date'] + cols + [self.target]] border1 = len(df_raw) - self.seq_len border2 = len(df_raw) if self.features == 'M' or self.features == 'MS': cols_data = df_raw.columns[1:] df_data = df_raw[cols_data] elif self.features == 'S': df_data = df_raw[[self.target]] if self.scale: self.scaler.fit(df_data.values) data = self.scaler.transform(df_data.values) else: data = df_data.values tmp_stamp = df_raw[['date']][border1:border2] tmp_stamp['date'] = pd.to_datetime(tmp_stamp.date) pred_dates = pd.date_range(tmp_stamp.date.values[-1], periods=self.pred_len + 1, freq=self.freq) df_stamp = pd.DataFrame(columns=['date']) df_stamp.date = list(tmp_stamp.date.values) + list(pred_dates[1:]) if self.timeenc == 0: df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1) df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1) df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1) df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1) df_stamp['minute'] = df_stamp.date.apply(lambda row: row.minute, 1) df_stamp['minute'] = df_stamp.minute.map(lambda x: x // 15) data_stamp = df_stamp.drop(['date'], 1).values elif self.timeenc == 1: data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq) data_stamp = data_stamp.transpose(1, 0) self.data_x = data[border1:border2] if self.inverse: self.data_y = df_data.values[border1:border2] else: self.data_y = data[border1:border2] self.data_stamp = data_stamp def __getitem__(self, index): s_begin = index s_end = s_begin + self.seq_len r_begin = s_end r_end = r_begin + self.pred_len seq_x = self.data_x[s_begin:s_end] if self.inverse: seq_y = self.data_x[r_begin:r_begin] else: seq_y = self.data_y[r_begin:r_begin] seq_x_mark = self.data_stamp[s_begin:s_end] seq_y_mark = self.data_stamp[r_begin:r_end] return seq_x, seq_y, seq_x_mark, seq_y_mark def __len__(self): return len(self.data_x) - self.seq_len + 1
[docs] def inverse_transform(self, data): return self.scaler.inverse_transform(data)
[docs] class Dataset_TSF(Dataset): def __init__(self, data_path, split='train', size=None, features='S', target='OT', scale=True, timeenc=0, freq='Daily', percent=10, max_len=-1, train_all=False, ): self.train_all = train_all self.seq_len = size[0] self.pred_len = size[2] type_map = {'train': 0, 'val': 1, 'test': 2} self.set_type = type_map[split] self.percent = percent self.max_len = max_len if self.max_len == -1: self.max_len = 1e8 self.data_path = data_path self.features = features self.target = target self.scale = scale self.timeenc = timeenc self.freq = freq self.percent = percent self.__read_data__() self.tot_len = self.len_index[-1] def __read_data__(self): self.scaler = StandardScaler() df, frequency, forecast_horizon, contain_missing_values, contain_equal_length = convert_tsf_to_dataframe(self.data_path) self.freq = frequency def dropna(x): return x[~np.isnan(x)] timeseries = [dropna(ts).astype(np.float32) for ts in df.series_value] self.data_all = [] self.len_index = [0] self.tot_len = 0 for i in range(len(timeseries)): df_raw = timeseries[i].reshape(-1, 1) num_train = int(len(df_raw) * 0.7) num_test = int(len(df_raw) * 0.2) num_vali = len(df_raw) - num_train - num_test border1s = [0, num_train - self.seq_len, len(df_raw) - num_test - self.seq_len] border2s = [num_train, num_train + num_vali, len(df_raw)] border1 = border1s[self.set_type] border2 = border2s[self.set_type] if self.set_type == 0: border2 = (border2 - self.seq_len) * self.percent // 100 + self.seq_len if self.scale: train_data = df_raw[border1s[0]:border2s[0]] self.scaler.fit(train_data) data = self.scaler.transform(df_raw) else: data = df_raw self.data_all.append(data[border1:border2]) self.len_index.append(self.len_index[-1] + border2 - border1 - self.seq_len - self.pred_len + 1) def __getitem__(self, index): i = 0 for i in range(len(self.len_index)): if index < self.len_index[i]: i -= 1 break s_begin = index - self.len_index[i] s_end = s_begin + self.seq_len r_begin = s_end r_end = r_begin + self.pred_len seq_x = self.data_all[i][s_begin:s_end] seq_y = self.data_all[i][r_begin:r_end] return seq_x, seq_y, np.empty(shape=(self.seq_len, 0)), np.empty(shape=(self.pred_len, 0)) def __len__(self): return self.tot_len
[docs] class Dataset_Custom_List(Dataset): def __init__( self, data_path=[], split='train', size=None, features='M', target='OT', scale=True, timeenc=0, freq='h', percent=10, max_len=-1, train_all=False ): # size [seq_len, pred_len] # info if size == None: self.seq_len = 24 * 4 * 4 self.pred_len = 24 * 4 else: self.seq_len, self.pred_len = size # init assert split in ['train', 'test', 'val'] type_map = {'train': 0, 'val': 1, 'test': 2} self.set_type = type_map[split] self.features = features self.target = target self.scale = scale self.timeenc = timeenc self.freq = freq self.percent = percent self.data_path = data_path self.__read_data__() self.tot_len = self.len_index[-1] def __read_data__(self): self.scaler = StandardScaler() self.data_all = [] self.len_index = [0] self.tot_len = 0 for path in self.data_path: if path.endswith('.csv'): df_raw = pd.read_csv(path) elif path.endswith('.feather'): df_raw = pd.read_feather(path) df_raw = df_raw.dropna() df_raw = df_raw.values if self.scale: df_raw = self.scaler.fit_transform(df_raw) self.data_all.append(df_raw) self.len_index.append(self.len_index[-1] + len(df_raw) - self.seq_len - self.pred_len + 1) def __getitem__(self, index): i = 0 for i in range(len(self.len_index)): if index < self.len_index[i]: i -= 1 break s_begin = index - self.len_index[i] s_end = s_begin + self.seq_len r_begin = s_end r_end = r_begin + self.pred_len seq_x = self.data_all[i][s_begin:s_end] seq_y = self.data_all[i][r_begin:r_end] return seq_x, seq_y, np.empty(shape=(self.seq_len, 0)), np.empty(shape=(self.pred_len, 0)) def __len__(self): return self.tot_len
[docs] def inverse_transform(self, data): return self.scaler.inverse_transform(data)
[docs] class Dataset_Custom_List_TS(Dataset): def __init__( self, data_path=[], split='train', size=None, features='M', target='OT', scale=True, timeenc=0, freq='h', percent=10, max_len=-1, train_all=False ): # size [seq_len, pred_len] # info if size == None: self.seq_len = 24 * 4 * 4 self.pred_len = 24 * 4 else: self.seq_len, self.pred_len = size # init assert split in ['train', 'test', 'val'] type_map = {'train': 0, 'val': 1, 'test': 2} self.set_type = type_map[split] self.features = features self.target = target self.scale = scale self.timeenc = timeenc self.freq = freq self.percent = percent self.data_path = data_path self.__read_data__() self.tot_len = self.len_index[-1] def __read_data__(self): self.scaler = StandardScaler() self.data_all = [] self.len_index = [0] self.tot_len = 0 for path in self.data_path: if path.endswith('.csv'): df_raw = pd.read_csv(path) elif path.endswith('.feather'): df_raw = pd.read_feather(path) # df_raw = df_raw.dropna() # df_raw = df_raw.values num_train = int(len(df_raw) * 0.7) num_test = int(len(df_raw) * 0.2) num_vali = len(df_raw) - num_train - num_test border1s = [0, num_train - self.seq_len, len(df_raw) - num_test - self.seq_len] border2s = [num_train, num_train + num_vali, len(df_raw)] border1 = border1s[self.set_type] border2 = border2s[self.set_type] if self.set_type == 0: border2 = (border2 - self.seq_len) * self.percent // 100 + self.seq_len if self.scale: train_data = df_raw[border1s[0]:border2s[0]] self.scaler.fit(train_data.values) data = self.scaler.transform(df_raw.values) else: data = df_raw.values self.data_all.append(data[border1:border2]) self.len_index.append(self.len_index[-1] + border2 - border1 - self.seq_len - self.pred_len + 1)
[docs] def add_data(self, df): assert len(df) >= self.seq_len + self.pred_len self.data_all.append(df) self.len_index.append(self.len_index[-1] + len(df) - self.seq_len - self.pred_len + 1) self.tot_len = self.len_index[-1]
def __getitem__(self, index): i = 0 for i in range(len(self.len_index)): if index < self.len_index[i]: i -= 1 break s_begin = index - self.len_index[i] s_end = s_begin + self.seq_len r_begin = s_end r_end = r_begin + self.pred_len seq_x = self.data_all[i][s_begin:s_end] seq_y = self.data_all[i][r_begin:r_end] return seq_x, seq_y, np.empty(shape=(self.seq_len, 0)), np.empty(shape=(self.pred_len, 0)) def __len__(self): return self.tot_len
[docs] def inverse_transform(self, data): return self.scaler.inverse_transform(data)
[docs] class Dataset_Custom_List_TS_TSF(Dataset): def __init__( self, data_path=[], split='train', size=None, features='M', target='OT', scale=True, timeenc=0, freq='h', percent=10, max_len=-1, train_all=False ): # size [seq_len, pred_len] # info if size == None: self.seq_len = 24 * 4 * 4 self.pred_len = 24 * 4 else: self.seq_len, _, self.pred_len = size # init assert split in ['train', 'test', 'val'] type_map = {'train': 0, 'val': 1, 'test': 2} self.set_type = type_map[split] self.features = features self.target = target self.scale = scale self.timeenc = timeenc self.freq = freq self.percent = percent self.data_path = data_path self.__read_data__() self.tot_len = self.len_index[-1] def __read_data__(self): self.scaler = StandardScaler() def dropna(x): return x[~np.isnan(x)] self.data_all = [] self.len_index = [0] self.tot_len = 0 for path in self.data_path: df, frequency, forecast_horizon, contain_missing_values, contain_equal_length = convert_tsf_to_dataframe(path) self.freq = frequency timeseries = [dropna(ts).astype(np.float32) for ts in df.series_value] for timeserie in timeseries: df_raw = timeserie.reshape(-1, 1) num_train = int(len(df_raw) * 0.7) num_test = int(len(df_raw) * 0.2) num_vali = len(df_raw) - num_train - num_test border1s = [0, num_train - self.seq_len, len(df_raw) - num_test - self.seq_len] border2s = [num_train, num_train + num_vali, len(df_raw)] border1 = border1s[self.set_type] border2 = border2s[self.set_type] if self.set_type == 0: border2 = (border2 - self.seq_len) * self.percent // 100 + self.seq_len if self.scale: train_data = df_raw[border1s[0]:border2s[0]] self.scaler.fit(train_data) data = self.scaler.transform(df_raw) else: data = df_raw self.data_all.append(data[border1:border2]) self.len_index.append(self.len_index[-1] + border2 - border1 - self.seq_len - self.pred_len + 1)
[docs] def add_data(self, df): assert len(df) >= self.seq_len + self.pred_len self.data_all.append(df) self.len_index.append(self.len_index[-1] + len(df) - self.seq_len - self.pred_len + 1) self.tot_len = self.len_index[-1]
def __getitem__(self, index): i = 0 for i in range(len(self.len_index)): if index < self.len_index[i]: i -= 1 break s_begin = index - self.len_index[i] s_end = s_begin + self.seq_len r_begin = s_end r_end = r_begin + self.pred_len seq_x = self.data_all[i][s_begin:s_end] seq_y = self.data_all[i][r_begin:r_end] return seq_x, seq_y, np.empty(shape=(self.seq_len, 0)), np.empty(shape=(self.pred_len, 0)) def __len__(self): return self.tot_len
[docs] def inverse_transform(self, data): return self.scaler.inverse_transform(data)