# Scalable GP Classification in 1D (w/ SVGP)ΒΆ

This example shows how to use grid interpolation based variational classification with an ApproximateGP using a VariationalStrategy module while learning the inducing point locations.

[1]:

import math
import torch
import gpytorch
from matplotlib import pyplot as plt
from math import exp

%matplotlib inline

/home/gpleiss/anaconda3/envs/gpytorch/lib/python3.7/site-packages/matplotlib/__init__.py:999: UserWarning: Duplicate key in file "/home/gpleiss/.dotfiles/matplotlib/matplotlibrc", line #57
(fname, cnt))

[2]:

train_x = torch.linspace(0, 1, 260)
train_y = torch.cos(train_x * (2 * math.pi)) + 0.1 * torch.randn(260)

[3]:

from gpytorch.models import ApproximateGP
from gpytorch.variational import CholeskyVariationalDistribution
from gpytorch.variational import VariationalStrategy
class SVGPRegressionModel(ApproximateGP):
def __init__(self, inducing_points):
variational_distribution = CholeskyVariationalDistribution(inducing_points.size(-1))
variational_strategy = VariationalStrategy(self,
inducing_points,
variational_distribution,
learn_inducing_locations=True)
super(SVGPRegressionModel, self).__init__(variational_strategy)
self.mean_module = gpytorch.means.ConstantMean()
self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel())

def forward(self,x):
mean_x = self.mean_module(x)
covar_x = self.covar_module(x)
latent_pred = gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
return latent_pred

# We'll initialize the inducing points to evenly span the space of train_x
inducing_points = torch.linspace(0, 1, 25)
model = SVGPRegressionModel(inducing_points)
likelihood = gpytorch.likelihoods.GaussianLikelihood()

[4]:

from gpytorch.mlls.variational_elbo import VariationalELBO

# Find optimal model hyperparameters
model.train()
likelihood.train()

{'params': model.parameters()},
{'params': likelihood.parameters()}
], lr=0.01)

# "Loss" for GPs - the marginal log likelihood
# n_data refers to the number of training datapoints
mll = VariationalELBO(likelihood, model, train_y.size(0), combine_terms=False)

def train():
num_iter = 200
for i in range(num_iter):
output = model(train_x)
# Calc loss and backprop gradients
log_lik, kl_div, log_prior = mll(output, train_y)
loss = -(log_lik - kl_div + log_prior)
loss.backward()
if i % 50 == 0:
print('Iter %d - Loss: %.3f [%.3f, %.3f, %.3f]' % (i + 1, loss.item(), log_lik.item(), kl_div.item(), log_prior.item()))
optimizer.step()

# Get clock time
%time train()

Iter 1 - Loss: 1.495 [-1.487, 0.008, 0.000]
Iter 51 - Loss: 0.926 [-0.873, 0.054, 0.000]
Iter 101 - Loss: 0.573 [-0.447, 0.127, 0.000]
Iter 151 - Loss: 0.380 [-0.116, 0.264, 0.000]
CPU times: user 18.9 s, sys: 19.9 s, total: 38.9 s
Wall time: 5.57 s

[5]:


test_x = torch.linspace(0, 1, 51)
observed_pred = likelihood(model(test_x))

# Initialize plot
f, ax = plt.subplots(1, 1, figsize=(4, 3))

# Get upper and lower confidence bounds
lower, upper = observed_pred.confidence_region()
# Plot training data as black stars
ax.plot(train_x.numpy(), train_y.numpy(), 'k*')
# Plot predictive means as blue line
ax.plot(test_x.numpy(), observed_pred.mean.detach().numpy(), 'b')
# Shade between the lower and upper confidence bounds
ax.fill_between(test_x.numpy(), lower.numpy(), upper.numpy(), alpha=0.5)
ax.set_ylim([-3, 3])
ax.legend(['Observed Data', 'Mean', 'Confidence'])

[ ]: