SV-DKL with PyroΒΆ

In [1]:
import math
import torch
import gpytorch
import pyro
from matplotlib import pyplot as plt

# Make plots inline
%matplotlib inline
/home/gpleiss/anaconda3/envs/gpytorch/lib/python3.7/site-packages/matplotlib/__init__.py:999: UserWarning: Duplicate key in file "/home/gpleiss/.dotfiles/matplotlib/matplotlibrc", line #57
  (fname, cnt))
In [2]:
import urllib.request
import os.path
from scipy.io import loadmat
from math import floor

if not os.path.isfile('3droad.mat'):
    print('Downloading \'3droad\' UCI dataset...')
    urllib.request.urlretrieve('https://www.dropbox.com/s/f6ow1i59oqx05pl/3droad.mat?dl=1', '3droad.mat')

data = torch.Tensor(loadmat('3droad.mat')['data'])
X = data[:, :-1]
X = X - X.min(0)[0]
X = 2 * (X / X.max(0)[0]) - 1
y = data[:, -1]

# Use the first 80% of the data for training, and the last 20% for testing.
train_n = int(floor(0.8*len(X)))

train_x = X[:train_n, :].contiguous().cuda()
train_y = y[:train_n].contiguous().cuda()

test_x = X[train_n:, :].contiguous().cuda()
test_y = y[train_n:].contiguous().cuda()
In [3]:
from torch.utils.data import TensorDataset, DataLoader
train_dataset = TensorDataset(train_x, train_y)
train_loader = DataLoader(train_dataset, batch_size=1024, shuffle=True)
In [4]:
data_dim = train_x.size(-1)

class LargeFeatureExtractor(torch.nn.Sequential):
    def __init__(self):
        super(LargeFeatureExtractor, self).__init__()
        self.add_module('linear1', torch.nn.Linear(data_dim, 1000))
        self.add_module('bn1', torch.nn.BatchNorm1d(1000))
        self.add_module('relu1', torch.nn.ReLU())
        self.add_module('linear2', torch.nn.Linear(1000, 500))
        self.add_module('bn2', torch.nn.BatchNorm1d(500))
        self.add_module('relu2', torch.nn.ReLU())
        self.add_module('linear3', torch.nn.Linear(500, 50))
        self.add_module('bn3', torch.nn.BatchNorm1d(50))
        self.add_module('relu3', torch.nn.ReLU())
        self.add_module('linear4', torch.nn.Linear(50, 2))

feature_extractor = LargeFeatureExtractor().cuda()
# num_features is the number of final features extracted by the neural network, in this case 2.
num_features = 2
In [5]:
from gpytorch.models import PyroVariationalGP
from gpytorch.variational import CholeskyVariationalDistribution, GridInterpolationVariationalStrategy


class PyroSVDKLGridInterpModel(PyroVariationalGP):
    def __init__(self, likelihood, grid_size=32, grid_bounds=[(-1, 1), (-1, 1)], name_prefix="svdkl_grid_example"):
        variational_distribution = CholeskyVariationalDistribution(num_inducing_points=(grid_size ** num_features))
        variational_strategy = GridInterpolationVariationalStrategy(self,
                                                                    grid_size=grid_size,
                                                                    grid_bounds=grid_bounds,
                                                                    variational_distribution=variational_distribution)
        super(PyroSVDKLGridInterpModel, self).__init__(variational_strategy,
                                                       likelihood,
                                                       num_data=train_y.numel(),
                                                       name_prefix=name_prefix)
        self.mean_module = gpytorch.means.ConstantMean()
        self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel(
            lengthscale_prior=gpytorch.priors.SmoothedBoxPrior(0.001, 1., sigma=0.1, log_transform=True)
        ))

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
In [6]:
class DKLModel(gpytorch.Module):
    def __init__(self, likelihood, feature_extractor, num_features, grid_bounds=(-1., 1.)):
        super(DKLModel, self).__init__()
        self.feature_extractor = feature_extractor
        self.gp_layer = PyroSVDKLGridInterpModel(likelihood)
        self.grid_bounds = grid_bounds
        self.num_features = num_features

    def features(self, x):
        features = self.feature_extractor(x)
        features = gpytorch.utils.grid.scale_to_bounds(features, self.grid_bounds[0], self.grid_bounds[1])
        return features

    def forward(self, x):
        res = self.gp_layer(self.features(x))
        return res

    def guide(self, x, y):
        self.gp_layer.guide(self.features(x), y)

    def model(self, x, y):
        pyro.module(self.gp_layer.name_prefix + ".feature_extractor", self.feature_extractor)
        self.gp_layer.model(self.features(x), y)


likelihood = gpytorch.likelihoods.GaussianLikelihood().cuda()
model = DKLModel(likelihood, feature_extractor, num_features=num_features).cuda()
In [7]:
from pyro import optim
from pyro import infer

optimizer = optim.Adam({"lr": 0.1})

elbo = infer.Trace_ELBO(num_particles=256, vectorize_particles=True)
svi = infer.SVI(model.model, model.guide, optimizer, elbo)
In [8]:
num_epochs = 3
# Not enough for this model to converge, but enough for a fast example

for i in range(num_epochs):
    # Within each iteration, we will go over each minibatch of data
    for minibatch_i, (x_batch, y_batch) in enumerate(train_loader):
        loss = svi.step(x_batch, y_batch)
    print('Epoch {}   Loss {}'.format(i + 1, loss))
Epoch 1   Loss 1361756.0124511719
Epoch 2   Loss 1443523.3786621094
Epoch 3   Loss 1332343.6667480469
In [9]:
model.eval()
likelihood.eval()
with torch.no_grad():
    preds = model(test_x)
In [10]:
print('Test MAE: {}'.format(torch.mean(torch.abs(preds.mean - test_y))))
Test MAE: 10.290990829467773