DataBlock API to construct the DataLoaders
We will create a DataBlock to process our UCR datasets
ucr_path = untar_data(URLs.UCR)
df_train, df_test = load_df_ucr(ucr_path, 'StarLightCurves')
df_train.head()
x_cols = df_train.columns[slice(0,-1)].to_list()
x_cols[0:5]
tts = TabularTS(df_train, x_names=x_cols, y_names='target')
tts.iloc[0:4]
to = TSPandas(df_train, x_names=x_cols, y_names='target')
to.iloc[0:5]
norm = Normalize()
df = df_train.loc[:, [x_cols[0]]]
to = TSPandas(df, norm, x_names=x_cols[0])
x = df.values.squeeze()
m,s = x.mean(),x.std()
test_eq(norm.means[x_cols[0]], m)
test_close(norm.stds[x_cols[0]], s)
test_close(to[x_cols[0]].values, (x-m)/s)
to = TSPandas(df_train, None, x_names=x_cols, y_names='target')
to.procs
Let's check we get the encoded batch
rtsb = ReadTSBatch(to)
x,y = rtsb.encodes(to.iloc[0:16])
x.shape, y.shape
This function needs to be redone
splits = RandomSplitter()(range_of(df_train))
to = TSPandas(df_test, norm, x_names=x_cols, y_names='target', splits=splits)
test_dl = TabularTSDataloader(to)
Quick function to test the performance of the DL
def cycle_dl(dl):
for x,y in iter(dl):
pass
%time cycle_dl(test_dl)
from timeseries_fastai.models import create_inception
df_main = stack_train_valid(df_train, df_test).iloc[0:128]
splits=[list(range(96)), list(range(96, 128))]
to = TSPandas(df_main, norm, x_names=x_cols, y_names='target', splits=splits)
dls = to.dataloaders(32, 128)
inception = create_inception(1, len(dls.vocab))
learn = Learner(dls, inception, metrics=[accuracy])
learn.fit_one_cycle(1)