We will create a DataBlock to process our UCR datasets

ucr_path = untar_data(URLs.UCR)

df_train, df_test = load_df_ucr(ucr_path, 'StarLightCurves')

Loading files from: /home/tcapelle/.fastai/data/Univariate2018_arff/StarLightCurves

df_train.head()

x_cols = df_train.columns[slice(0,-1)].to_list()
x_cols[0:5]

['att1', 'att2', 'att3', 'att4', 'att5']

tts = TabularTS(df_train, x_names=x_cols, y_names='target')

tts.iloc[0:4]

       att1      att2      att3      att4      att5      att6      att7  \
0  0.537303  0.531103  0.528503  0.529403  0.533603  0.540903  0.551103   
1  0.588398  0.593898  0.599098  0.604098  0.608798  0.613397  0.617797   
2 -0.049900 -0.041500 -0.033400 -0.025600 -0.018100 -0.010800 -0.003800   
3  1.337005  1.319805  1.302905  1.286305  1.270005  1.254005  1.238304   

       att8      att9     att10  ...   att1016   att1017   att1018   att1019  \
0  0.564003  0.579603  0.597603  ...  0.546903  0.545903  0.543903  0.541003   
1  0.622097  0.626097  0.630097  ...  0.237399  0.246499  0.256199  0.266499   
2  0.003000  0.009600  0.015900  ... -0.173801 -0.161601 -0.149201 -0.136401   
3  1.223005  1.208104  1.193504  ...  1.288905  1.298505  1.307705  1.316505   

    att1020   att1021   att1022   att1023   att1024  target  
0  0.537203  0.532303  0.526403  0.519503  0.511403    b'3'  
1  0.277399  0.288799  0.300899  0.313599  0.326899    b'3'  
2 -0.123201 -0.109701 -0.095901 -0.081701 -0.067100    b'1'  
3  1.324905  1.332805  1.340205  1.347005  1.353205    b'3'  

[4 rows x 1025 columns]

to = TSPandas(df_train, x_names=x_cols, y_names='target')

to.iloc[0:5]

       att1      att2      att3      att4      att5      att6      att7  \
0  0.537303  0.531103  0.528503  0.529403  0.533603  0.540903  0.551103   
1  0.588398  0.593898  0.599098  0.604098  0.608798  0.613397  0.617797   
2 -0.049900 -0.041500 -0.033400 -0.025600 -0.018100 -0.010800 -0.003800   
3  1.337005  1.319805  1.302905  1.286305  1.270005  1.254005  1.238304   
4  0.769801  0.775301  0.780401  0.785101  0.789401  0.793301  0.796801   

       att8      att9     att10  ...   att1016   att1017   att1018   att1019  \
0  0.564003  0.579603  0.597603  ...  0.546903  0.545903  0.543903  0.541003   
1  0.622097  0.626097  0.630097  ...  0.237399  0.246499  0.256199  0.266499   
2  0.003000  0.009600  0.015900  ... -0.173801 -0.161601 -0.149201 -0.136401   
3  1.223005  1.208104  1.193504  ...  1.288905  1.298505  1.307705  1.316505   
4  0.799901  0.802601  0.805101  ...  0.742401  0.744501  0.747301  0.750701   

    att1020   att1021   att1022   att1023   att1024  target  
0  0.537203  0.532303  0.526403  0.519503  0.511403    b'3'  
1  0.277399  0.288799  0.300899  0.313599  0.326899    b'3'  
2 -0.123201 -0.109701 -0.095901 -0.081701 -0.067100    b'1'  
3  1.324905  1.332805  1.340205  1.347005  1.353205    b'3'  
4  0.754801  0.759501  0.765001  0.771301  0.778401    b'3'  

[5 rows x 1025 columns]

norm = Normalize()
df = df_train.loc[:, [x_cols[0]]]
to = TSPandas(df, norm, x_names=x_cols[0])
x = df.values.squeeze()
m,s = x.mean(),x.std()
test_eq(norm.means[x_cols[0]], m)
test_close(norm.stds[x_cols[0]], s)
test_close(to[x_cols[0]].values, (x-m)/s)

to = TSPandas(df_train, None, x_names=x_cols, y_names='target')

to.procs

Pipeline: Categorize -- {'vocab': None, 'sort': True, 'add_na': False}

Let's check we get the encoded batch

rtsb = ReadTSBatch(to)

x,y = rtsb.encodes(to.iloc[0:16])

x.shape, y.shape

(torch.Size([16, 1, 1024]), torch.Size([16, 1]))

This function needs to be redone

splits = RandomSplitter()(range_of(df_train))
to = TSPandas(df_test, norm, x_names=x_cols, y_names='target', splits=splits)

test_dl = TabularTSDataloader(to)

Quick function to test the performance of the DL

def cycle_dl(dl):
    for x,y in iter(dl):
        pass

%time cycle_dl(test_dl)

CPU times: user 80.2 ms, sys: 11 µs, total: 80.2 ms
Wall time: 80 ms

Integration Example

from timeseries_fastai.models import create_inception

df_main = stack_train_valid(df_train, df_test).iloc[0:128]

splits=[list(range(96)), list(range(96, 128))]
to = TSPandas(df_main, norm, x_names=x_cols, y_names='target', splits=splits)

dls = to.dataloaders(32, 128)
inception = create_inception(1, len(dls.vocab))
learn = Learner(dls, inception, metrics=[accuracy])
learn.fit_one_cycle(1)

Tabular inspired Data

`class` `TabularTS`[source]

`class` `TSPandas`[source]

`setups`[source]

`encodes`[source]

`decodes`[source]

`class` `NormalizeTS`[source]

`setups`[source]

`encodes`[source]

`decodes`[source]

`class` `ReadTSBatch`[source]

`class` `TabularTSDataloader`[source]

`stack_train_valid`[source]

Integration Example

	att1	att2	att3	att4	att5	att6	att7	att8	att9	att10	...	att1016	att1017	att1018	att1019	att1020	att1021	att1022	att1023	att1024	target
0	0.537303	0.531103	0.528503	0.529403	0.533603	0.540903	0.551103	0.564003	0.579603	0.597603	...	0.546903	0.545903	0.543903	0.541003	0.537203	0.532303	0.526403	0.519503	0.511403	b'3'
1	0.588398	0.593898	0.599098	0.604098	0.608798	0.613397	0.617797	0.622097	0.626097	0.630097	...	0.237399	0.246499	0.256199	0.266499	0.277399	0.288799	0.300899	0.313599	0.326899	b'3'
2	-0.049900	-0.041500	-0.033400	-0.025600	-0.018100	-0.010800	-0.003800	0.003000	0.009600	0.015900	...	-0.173801	-0.161601	-0.149201	-0.136401	-0.123201	-0.109701	-0.095901	-0.081701	-0.067100	b'1'
3	1.337005	1.319805	1.302905	1.286305	1.270005	1.254005	1.238304	1.223005	1.208104	1.193504	...	1.288905	1.298505	1.307705	1.316505	1.324905	1.332805	1.340205	1.347005	1.353205	b'3'
4	0.769801	0.775301	0.780401	0.785101	0.789401	0.793301	0.796801	0.799901	0.802601	0.805101	...	0.742401	0.744501	0.747301	0.750701	0.754801	0.759501	0.765001	0.771301	0.778401	b'3'

Tabular inspired Data

class TabularTS[source]

class TSPandas[source]

setups[source]

encodes[source]

decodes[source]

class NormalizeTS[source]

setups[source]

encodes[source]

decodes[source]

class ReadTSBatch[source]

class TabularTSDataloader[source]

stack_train_valid[source]

Integration Example

`class` `TabularTS`[source]

`class` `TSPandas`[source]

`setups`[source]

`encodes`[source]

`decodes`[source]

`class` `NormalizeTS`[source]

`setups`[source]

`encodes`[source]

`decodes`[source]

`class` `ReadTSBatch`[source]

`class` `TabularTSDataloader`[source]

`stack_train_valid`[source]