Commit c64fb7de authored by Paul Primus's avatar Paul Primus
Browse files

add conv-autoencoder architecture

parent 950312f7
......@@ -98,9 +98,9 @@ def configuration():
hop_size = 512
power = 2.0
fmin = 0
context = 5
context = 8
model_class = 'dcase2020_task2.models.MADE'
model_class = 'dcase2020_task2.models.ConvAE' # 'dcase2020_task2.models.MADE'
hidden_size = 1024
num_hidden = 4
latent_size = 8 # only used for AEs
......@@ -113,8 +113,8 @@ def configuration():
epochs = 100
num_workers = 1
reconstruction_class = 'dcase2020_task2.losses.NLLReconstruction'
batch_size = 512
reconstruction_class = 'dcase2020_task2.losses.MSEReconstruction' # 'dcase2020_task2.losses.NLLReconstruction'
batch_size = 256
learning_rate = 1e-3
weight_decay = 0
......
......@@ -26,21 +26,15 @@ class NLLReconstruction(BaseReconstruction):
# prepare observations and prediction based on loss type:
# use linear outputs & normalized observations as is
# MAF eq 4 -- return mean and log std
batch['m'], batch['loga'] = batch['pre_reconstructions'].chunk(chunks=2, dim=1)
# this guys should be normally distributed....
batch['u'] = (batch['observations'].view(len(batch['observations']), -1) - batch['m']) * torch.exp(-batch['loga'])
# MAF eq 5
batch['log_abs_det_jacobian'] = -batch['loga']
# log probability
batch['log_proba'] = torch.sum(self.base_dist.log_prob(batch['u']) + batch['log_abs_det_jacobian'], dim=1)
# scores
batch['scores'] = -batch['log_proba']
batch['scores'] = - batch['log_proba']
batch['visualizations'] = batch['u'].view(-1, *self.input_shape)
batch['visualizations'] = batch['reconstruction']
# loss
batch['reconstruction_loss_raw'] = - batch['log_proba'].mean()
......
from dcase2020_task2.models.base_model import ClassifierBase, VAEBase
from dcase2020_task2.models.made import MADE
from dcase2020_task2.models.ae import AE
\ No newline at end of file
from dcase2020_task2.models.ae import AE, ConvAE
\ No newline at end of file
import torch.nn
from dcase2020_task2.models import VAEBase#
from dcase2020_task2.models import VAEBase #
from dcase2020_task2.priors import NoPrior
import numpy as np
import torch
from dcase2020_task2.models.custom import activation_dict, init_weights
from torchsummary import summary
class AE(torch.nn.Module, VAEBase):
......@@ -29,13 +29,14 @@ class AE(torch.nn.Module, VAEBase):
self.reconstruction = reconstruction_loss
# encoder sizes/ layers
sizes = [np.prod(input_shape) ] + [hidden_size] * num_hidden + [prior.input_size]
sizes = [np.prod(input_shape)] + [hidden_size] * num_hidden + [prior.input_size]
encoder_layers = []
for i, o in zip(sizes[:-1], sizes[1:]):
encoder_layers.append(torch.nn.Linear(i, o))
if batch_norm:
encoder_layers.append(torch.nn.BatchNorm1d(o))
encoder_layers.append(activation_fn())
encoder_layers.append(torch.nn.AvgPool2d(kernel_size=2))
# symetric decoder sizes/ layers
sizes = sizes[::-1]
......@@ -47,6 +48,7 @@ class AE(torch.nn.Module, VAEBase):
decoder_layers.append(activation_fn())
# remove last relu
_ = decoder_layers.pop()
_ = decoder_layers.pop()
self.encoder = torch.nn.Sequential(*encoder_layers)
self.decoder = torch.nn.Sequential(*decoder_layers)
......@@ -68,3 +70,169 @@ class AE(torch.nn.Module, VAEBase):
batch['pre_reconstructions'] = self.decoder(batch['codes']).view(-1, *self.input_shape)
batch = self.reconstruction(batch)
return batch
class ResidualBlock(torch.nn.Module):
def __init__(
self,
n_units,
n_layers=2,
kernel_size=(3, 3),
activation='relu'
):
super().__init__()
modules = []
for _ in range(n_layers):
modules.append(
torch.nn.Conv2d(
n_units,
n_units,
kernel_size=kernel_size,
padding=(kernel_size[0]//2, kernel_size[1]//2)
)
)
modules.append(
torch.nn.BatchNorm2d(
n_units
)
)
modules.append(
activation_dict[activation]()
)
self.last_activation = modules.pop()
self.block = torch.nn.Sequential(
*modules
)
def forward(self, x):
x = self.block(x) + x
return self.last_activation(x)
class ConvAE(torch.nn.Module, VAEBase):
def __init__(
self,
input_shape,
reconstruction_loss,
prior=None,
hidden_size=128,
num_hidden=3,
activation='relu'
):
super().__init__()
activation_fn = activation_dict[activation]
if prior is None:
prior = NoPrior(latent_size=hidden_size)
self.input_shape = input_shape
self.prior = prior
self.reconstruction = reconstruction_loss
self.input = torch.nn.Conv2d(
input_shape[0],
hidden_size,
kernel_size=1
)
self.block1 = ResidualBlock(
hidden_size,
n_layers=1,
kernel_size=(3, 3),
activation='relu'
)
self.pool1 = torch.nn.MaxPool2d(2, return_indices=True)
self.block2 = ResidualBlock(
hidden_size,
n_layers=1,
kernel_size=(3, 3),
activation='relu'
)
self.pool2 = torch.nn.MaxPool2d(2, return_indices=True)
self.block3 = ResidualBlock(
hidden_size,
n_layers=1,
kernel_size=(3, 3),
activation='relu'
)
self.pool3 = torch.nn.MaxPool2d(2, return_indices=True)
pre_hidden_size = hidden_size * input_shape[1]//8 * input_shape[2]//8
self.pre_prior = torch.nn.Sequential(
torch.nn.Linear(pre_hidden_size, self.prior.input_size)
)
self.post_prior = torch.nn.Sequential(
torch.nn.Linear(self.prior.latent_size, pre_hidden_size),
activation_fn()
)
self.block4 = ResidualBlock(
hidden_size,
n_layers=1,
kernel_size=(3, 3),
activation='relu'
)
self.block5 = ResidualBlock(
hidden_size,
n_layers=1,
kernel_size=(3, 3),
activation='relu'
)
self.block6 = ResidualBlock(
hidden_size,
n_layers=1,
kernel_size=(3, 3),
activation='relu'
)
self.output = torch.nn.Conv2d(
hidden_size,
input_shape[0],
kernel_size=1,
)
self.apply(init_weights)
def forward(self, batch):
batch = self.encode(batch)
shape = batch['pre_codes'].shape
batch['pre_codes'] = self.pre_prior(batch['pre_codes'].view(shape[0], -1))
batch = self.prior(batch)
batch['post_codes'] = self.post_prior(batch['codes']).view(shape)
batch = self.decode(batch)
return batch
def encode(self, batch):
x = batch['observations']
x = self.input(x)
x, idx1 = self.pool1(self.block1(x))
x, idx2 = self.pool2(self.block2(x))
x, idx3 = self.pool3(self.block3(x))
batch['pre_codes'] = x
batch['pool_indices'] = [idx1, idx2, idx3]
return batch
def decode(self, batch):
x = torch.nn.functional.max_unpool2d(batch['post_codes'], batch['pool_indices'][2], 2)
x = torch.nn.functional.max_unpool2d(self.block4(x), batch['pool_indices'][1], 2)
x = torch.nn.functional.max_unpool2d(self.block5(x), batch['pool_indices'][0], 2)
batch['pre_reconstructions'] = self.output(self.block6(x))
batch = self.reconstruction(batch)
return batch
# from dcase2020_task2.losses import MSEReconstruction
# ae = ConvAE((1, 128, 16), MSEReconstruction((1, 128, 16)))
# batch = {
# 'observations': torch.zeros((512, 1, 128, 16))
# }
# batch = ae(batch)
# print(batch)
......@@ -19,28 +19,29 @@ def init_weights(m):
# Model layers and helpers
# --------------------
def create_masks(input_size, hidden_size, n_hidden, input_order='sequential', input_degrees=None):
def create_masks(input_size, hidden_size, n_hidden, input_order='random', input_degrees=None):
# MADE paper sec 4:
# degrees of connections between layers -- ensure at most in_degree - 1 connections
degrees = []
if input_degrees is None:
assert all(input_degrees >= 0) and all(input_degrees < input_size)
# set input degrees to what is provided in args (the flipped order of the previous layer in a stack of mades);
# else init input degrees based on strategy in input_order (sequential or random)
if input_order == 'sequential':
degrees += [torch.arange(input_size)] if input_degrees is None else [input_degrees]
for _ in range(n_hidden + 1):
degrees += [torch.arange(hidden_size) % (input_size - 1)]
degrees += [torch.arange(input_size) % input_size - 1] if input_degrees is None else [
input_degrees % input_size - 1]
degrees += [torch.arange(input_size) % input_size - 1] if input_degrees is None else [input_degrees % input_size - 1]
elif input_order == 'random':
degrees += [torch.randperm(input_size)] if input_degrees is None else [input_degrees]
input_degrees = (torch.randperm(input_size) + 1) if input_degrees is None else (input_degrees + 1)
degrees += [input_degrees]
for _ in range(n_hidden + 1):
min_prev_degree = min(degrees[-1].min().item(), input_size - 1)
degrees += [torch.randint(min_prev_degree, input_size, (hidden_size,))]
min_prev_degree = min(degrees[-1].min().item(), input_size - 1)
degrees += [torch.randint(min_prev_degree, input_size, (input_size,)) - 1] if input_degrees is None else [
input_degrees - 1]
degrees += [input_degrees - 1]
# construct masks
masks = []
......
......@@ -13,8 +13,8 @@ class MADE(nn.Module):
hidden_size=4096,
num_hidden=4,
activation='relu',
input_order='random',
cond_label_size=None
cond_label_size=None,
**kwargs
):
"""
Args:
......@@ -25,14 +25,17 @@ class MADE(nn.Module):
self.input_shape = input_shape
self.reconstruction = reconstruction
input_degree = torch.arange(int(np.prod(input_shape))).view(input_shape).transpose(2, 1).reshape(-1)
# input_degree = torch.arange(int(np.prod(input_shape)))
# create masks
# use natural order as input order
masks, self.input_degrees = create_masks(
int(np.prod(input_shape)),
hidden_size,
num_hidden,
input_order=input_order,
input_degrees=torch.arange(int(np.prod(input_shape)))
input_degrees=input_degree,
input_order='sequential'
)
# setup activation
......@@ -53,19 +56,23 @@ class MADE(nn.Module):
def forward(self, batch):
# MAF eq 4 -- return mean and log std
x = batch['observations']
x = x.view(x.shape[0], -1)
batch_size = batch['observations'].shape[0]
x = batch['observations'].reshape(batch_size, -1)
y = batch.get('y', None)
batch['pre_reconstructions'] = self.net(
self.input_layer(
x,
y
)
)
m, loga = self.net(self.input_layer(x, y)).chunk(chunks=2, dim=1)
batch = self.reconstruction(batch)
# this guys should be normally distributed....
u = (x - m) * torch.exp(-loga)
# MAF eq 5
batch['u'] = u
batch['log_abs_det_jacobian'] = - loga
batch['reconstruction'] = m.reshape(batch_size, *self.input_shape)
batch = self.reconstruction(batch)
return batch
def inverse(self, u, y=None, sum_log_abs_det_jacobians=None):
......
This diff is collapsed.
File mode changed from 100644 to 100755
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment