[1]:
import math
import torch
import gpytorch
from matplotlib import pyplot as plt

%matplotlib inline
%load_ext autoreload
%autoreload 2

Set up training data

In the next cell, we set up the training data for this example. We’ll be using 15 regularly spaced points on [0,1] which we evaluate the function on and add Gaussian noise to get the training labels.

We’ll have two functions - a sine function (y1) and a cosine function (y2)

[2]:
train_x = torch.linspace(0, 1, 100)

train_y1 = torch.sin(train_x * (2 * math.pi)) + torch.randn(train_x.size()) * 0.2
train_y2 = torch.cos(train_x * (2 * math.pi)) + torch.randn(train_x.size()) * 0.2

Set up the model

The model should be somewhat similar to the ExactGP model in the simple regression example.

The differences:

  1. The model takes two input: the inputs (x) and indices. The indices indicate which task we want an output for,
  2. Rather than just using a RBFKernel, we’re using that in conjunction with a IndexKernel
  3. We don’t use a ScaleKernel, since the IndexKernel will do some scaling for us. (This way we’re not overparameterizing the kernel.)
[3]:
class MultitaskGPModel(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood):
        super(MultitaskGPModel, self).__init__(train_x, train_y, likelihood)
        self.mean_module = gpytorch.means.ConstantMean()
        self.covar_module = gpytorch.kernels.RBFKernel()

        # We learn an IndexKernel for 2 tasks
        # (so we'll actually learn 2x2=4 tasks with correlations)
        self.task_covar_module = gpytorch.kernels.IndexKernel(num_tasks=2, rank=1)

    def forward(self,x,i):
        mean_x = self.mean_module(x)

        # Get input-input covariance
        covar_x = self.covar_module(x)
        # Get task-task covariance
        covar_i = self.task_covar_module(i)
        # Multiply the two together to get the covariance we want
        covar = covar_x.mul(covar_i)

        return gpytorch.distributions.MultivariateNormal(mean_x, covar)

likelihood = gpytorch.likelihoods.GaussianLikelihood()

# Here we want outputs for every input and task
# This is not the most efficient model for this: it's better to use the model in the ./Multitask_GP_Regression.ipynb notebook
# Since we are learning two tasks we feed in the x_data twice, along with the
# y_data along with its indices
train_i_task1 = torch.full_like(train_x, dtype=torch.long, fill_value=0)
train_i_task2 = torch.full_like(train_x, dtype=torch.long, fill_value=1)

full_train_x = torch.cat([train_x, train_x])
full_train_i = torch.cat([train_i_task1, train_i_task2])
full_train_y = torch.cat([train_y1, train_y2])

# Here we have two iterms that we're passing in as train_inputs
model = MultitaskGPModel((full_train_x, full_train_i), full_train_y, likelihood)

Training the model

In the next cell, we handle using Type-II MLE to train the hyperparameters of the Gaussian process. The spectral mixture kernel’s hyperparameters start from what was specified in initialize_from_data.

See the simple regression example for more info on this step.

[4]:
# Find optimal model hyperparameters
model.train()
likelihood.train()

# Use the adam optimizer
optimizer = torch.optim.Adam([
    {'params': model.parameters()},  # Includes GaussianLikelihood parameters
], lr=0.1)

# "Loss" for GPs - the marginal log likelihood
mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

for i in range(50):
    optimizer.zero_grad()
    output = model(full_train_x, full_train_i)
    loss = -mll(output, full_train_y)
    loss.backward()
    print('Iter %d/50 - Loss: %.3f' % (i + 1, loss.item()))
    optimizer.step()
Iter 1/50 - Loss: 1.119
Iter 2/50 - Loss: 1.071
Iter 3/50 - Loss: 1.019
Iter 4/50 - Loss: 0.961
Iter 5/50 - Loss: 0.905
Iter 6/50 - Loss: 0.844
Iter 7/50 - Loss: 0.788
Iter 8/50 - Loss: 0.731
Iter 9/50 - Loss: 0.670
Iter 10/50 - Loss: 0.626
Iter 11/50 - Loss: 0.580
Iter 12/50 - Loss: 0.546
Iter 13/50 - Loss: 0.491
Iter 14/50 - Loss: 0.471
Iter 15/50 - Loss: 0.433
Iter 16/50 - Loss: 0.373
Iter 17/50 - Loss: 0.357
Iter 18/50 - Loss: 0.319
Iter 19/50 - Loss: 0.288
Iter 20/50 - Loss: 0.251
Iter 21/50 - Loss: 0.217
Iter 22/50 - Loss: 0.166
Iter 23/50 - Loss: 0.150
Iter 24/50 - Loss: 0.114
Iter 25/50 - Loss: 0.095
Iter 26/50 - Loss: 0.075
Iter 27/50 - Loss: 0.046
Iter 28/50 - Loss: 0.016
Iter 29/50 - Loss: -0.003
Iter 30/50 - Loss: -0.018
Iter 31/50 - Loss: -0.023
Iter 32/50 - Loss: -0.019
Iter 33/50 - Loss: -0.022
Iter 34/50 - Loss: -0.024
Iter 35/50 - Loss: -0.048
Iter 36/50 - Loss: -0.043
Iter 37/50 - Loss: -0.040
Iter 38/50 - Loss: -0.027
Iter 39/50 - Loss: -0.027
Iter 40/50 - Loss: -0.029
Iter 41/50 - Loss: -0.013
Iter 42/50 - Loss: -0.014
Iter 43/50 - Loss: -0.031
Iter 44/50 - Loss: 0.015
Iter 45/50 - Loss: -0.014
Iter 46/50 - Loss: -0.031
Iter 47/50 - Loss: -0.035
Iter 48/50 - Loss: -0.021
Iter 49/50 - Loss: -0.025
Iter 50/50 - Loss: -0.030

Make predictions with the model

[5]:
# Set into eval mode
model.eval()
likelihood.eval()

# Initialize plots
f, (y1_ax, y2_ax) = plt.subplots(1, 2, figsize=(8, 3))

# Test points every 0.02 in [0,1]
test_x = torch.linspace(0, 1, 51)
tast_i_task1 = torch.full_like(test_x, dtype=torch.long, fill_value=0)
test_i_task2 = torch.full_like(test_x, dtype=torch.long, fill_value=1)

# Make predictions - one task at a time
# We control the task we cae about using the indices

# The gpytorch.settings.fast_pred_var flag activates LOVE (for fast variances)
# See https://arxiv.org/abs/1803.06058
with torch.no_grad(), gpytorch.settings.fast_pred_var():
    observed_pred_y1 = likelihood(model(test_x, tast_i_task1))
    observed_pred_y2 = likelihood(model(test_x, test_i_task2))

# Define plotting function
def ax_plot(ax, train_y, rand_var, title):
    # Get lower and upper confidence bounds
    lower, upper = rand_var.confidence_region()
    # Plot training data as black stars
    ax.plot(train_x.detach().numpy(), train_y.detach().numpy(), 'k*')
    # Predictive mean as blue line
    ax.plot(test_x.detach().numpy(), rand_var.mean.detach().numpy(), 'b')
    # Shade in confidence
    ax.fill_between(test_x.detach().numpy(), lower.detach().numpy(), upper.detach().numpy(), alpha=0.5)
    ax.set_ylim([-3, 3])
    ax.legend(['Observed Data', 'Mean', 'Confidence'])
    ax.set_title(title)

# Plot both tasks
ax_plot(y1_ax, train_y1, observed_pred_y1, 'Observed Values (Likelihood)')
ax_plot(y2_ax, train_y2, observed_pred_y2, 'Observed Values (Likelihood)')
../../_images/examples_03_Multitask_GP_Regression_Hadamard_Multitask_GP_Regression_9_0.png
[ ]: