Input -> (imgs: torch.Size([1, 6, 3, 64, 64]), feats: torch.Size([1, 5, 6]))
augmented imgs: torch.Size([1, 6, 8, 64, 64])
Encode Timestep:(i=0)
input shape: torch.Size([1, 6, 8, 64, 64])
CondTime->x.shape: torch.Size([1, 6, 18, 64, 64])
encoded images shape: torch.Size([1, 6, 256, 16, 16])
temp_enc out shape: torch.Size([1, 128, 16, 16])
Encode Timestep:(i=1)
input shape: torch.Size([1, 6, 8, 64, 64])
CondTime->x.shape: torch.Size([1, 6, 18, 64, 64])
encoded images shape: torch.Size([1, 6, 256, 16, 16])
temp_enc out shape: torch.Size([1, 128, 16, 16])
Encode Timestep:(i=2)
input shape: torch.Size([1, 6, 8, 64, 64])
CondTime->x.shape: torch.Size([1, 6, 18, 64, 64])
encoded images shape: torch.Size([1, 6, 256, 16, 16])
temp_enc out shape: torch.Size([1, 128, 16, 16])
Encode Timestep:(i=3)
input shape: torch.Size([1, 6, 8, 64, 64])
CondTime->x.shape: torch.Size([1, 6, 18, 64, 64])
encoded images shape: torch.Size([1, 6, 256, 16, 16])
temp_enc out shape: torch.Size([1, 128, 16, 16])
Encode Timestep:(i=4)
input shape: torch.Size([1, 6, 8, 64, 64])
CondTime->x.shape: torch.Size([1, 6, 18, 64, 64])
encoded images shape: torch.Size([1, 6, 256, 16, 16])
temp_enc out shape: torch.Size([1, 128, 16, 16])
Encode Timestep:(i=5)
input shape: torch.Size([1, 6, 8, 64, 64])
CondTime->x.shape: torch.Size([1, 6, 18, 64, 64])
encoded images shape: torch.Size([1, 6, 256, 16, 16])
temp_enc out shape: torch.Size([1, 128, 16, 16])
Encode Timestep:(i=6)
input shape: torch.Size([1, 6, 8, 64, 64])
CondTime->x.shape: torch.Size([1, 6, 18, 64, 64])
encoded images shape: torch.Size([1, 6, 256, 16, 16])
temp_enc out shape: torch.Size([1, 128, 16, 16])
Encode Timestep:(i=7)
input shape: torch.Size([1, 6, 8, 64, 64])
CondTime->x.shape: torch.Size([1, 6, 18, 64, 64])
encoded images shape: torch.Size([1, 6, 256, 16, 16])
temp_enc out shape: torch.Size([1, 128, 16, 16])
Encode Timestep:(i=8)
input shape: torch.Size([1, 6, 8, 64, 64])
CondTime->x.shape: torch.Size([1, 6, 18, 64, 64])
encoded images shape: torch.Size([1, 6, 256, 16, 16])
temp_enc out shape: torch.Size([1, 128, 16, 16])
Encode Timestep:(i=9)
input shape: torch.Size([1, 6, 8, 64, 64])
CondTime->x.shape: torch.Size([1, 6, 18, 64, 64])
encoded images shape: torch.Size([1, 6, 256, 16, 16])
temp_enc out shape: torch.Size([1, 128, 16, 16])
res.shape=torch.Size([10])