Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 9 additions & 8 deletions gpytorch/models/exact_gp.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import torch

from .. import settings
from ..distributions import MultivariateNormal
from ..distributions import MultitaskMultivariateNormal, MultivariateNormal
from ..likelihoods import _GaussianLikelihoodBase
from ..utils.generic import length_safe_zip
from ..utils.warnings import GPInputWarning
Expand Down Expand Up @@ -162,15 +162,17 @@ def get_fantasy_model(self, inputs, targets, **kwargs):

model_batch_shape = self.train_inputs[0].shape[:-2]

if self.train_targets.dim() > len(model_batch_shape) + 1:
raise RuntimeError("Cannot yet add fantasy observations to multitask GPs, but this is coming soon!")

if not isinstance(inputs, list):
inputs = [inputs]

inputs = [i.unsqueeze(-1) if i.ndimension() == 1 else i for i in inputs]

target_batch_shape = targets.shape[:-1]
if not isinstance(self.prediction_strategy.train_prior_dist, MultitaskMultivariateNormal):
data_dim_start = -1
else:
data_dim_start = -2

target_batch_shape = targets.shape[:data_dim_start]
input_batch_shape = inputs[0].shape[:-2]
tbdim, ibdim = len(target_batch_shape), len(input_batch_shape)

Expand Down Expand Up @@ -198,7 +200,7 @@ def get_fantasy_model(self, inputs, targets, **kwargs):
# computing the covariance for each element of the batch. Therefore we don't expand the inputs to the
# size of the fantasy model here - this is done below, after the evaluation and fast fantasy update
train_inputs = [tin.expand(input_batch_shape + tin.shape[-2:]) for tin in self.train_inputs]
train_targets = self.train_targets.expand(target_batch_shape + self.train_targets.shape[-1:])
train_targets = self.train_targets.expand(target_batch_shape + self.train_targets.shape[data_dim_start:])

full_inputs = [
torch.cat(
Expand All @@ -208,8 +210,7 @@ def get_fantasy_model(self, inputs, targets, **kwargs):
for train_input, input in length_safe_zip(train_inputs, inputs)
]
full_targets = torch.cat(
[train_targets, targets.expand(target_batch_shape + targets.shape[-1:])],
dim=-1,
[train_targets, targets.expand(target_batch_shape + targets.shape[data_dim_start:])], dim=data_dim_start
)

try:
Expand Down
27 changes: 24 additions & 3 deletions gpytorch/models/exact_prediction_strategies.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
from torch import Tensor

from .. import settings

from ..distributions import MultitaskMultivariateNormal
from ..lazy import LazyEvaluatedKernelTensor
from ..utils.memoize import add_to_cache, cached, clear_cache_hook, pop_from_cache

Expand Down Expand Up @@ -134,16 +136,28 @@ def get_fantasy_strategy(self, inputs, targets, full_inputs, full_targets, full_
A `DefaultPredictionStrategy` model with `n + m` training examples, where the `m` fantasy examples have
been added and all test-time caches have been updated.
"""
if not isinstance(full_output, MultitaskMultivariateNormal):
target_batch_shape = targets.shape[:-1]
else:
target_batch_shape = targets.shape[:-2]

full_mean, full_covar = full_output.mean, full_output.lazy_covariance_matrix

batch_shape = full_inputs[0].shape[:-2]

full_mean = full_mean.view(*batch_shape, -1)
num_train = self.num_train

if isinstance(full_output, MultitaskMultivariateNormal):
num_tasks = full_output.event_shape[-1]
full_mean = full_mean.view(*batch_shape, -1, num_tasks)
fant_mean = full_mean[..., (num_train // num_tasks) :, :]
full_targets = full_targets.view(*target_batch_shape, -1)
else:
full_mean = full_mean.view(*batch_shape, -1)
fant_mean = full_mean[..., num_train:]

# Evaluate fant x train and fant x fant covariance matrices, leave train x train unevaluated.
fant_fant_covar = full_covar[..., num_train:, num_train:]
fant_mean = full_mean[..., num_train:]
mvn = self.train_prior_dist.__class__(fant_mean, fant_fant_covar)
fant_likelihood = self.likelihood.get_fantasy_likelihood(**kwargs)
mvn_obs = fant_likelihood(mvn, inputs, **kwargs)
Expand Down Expand Up @@ -209,6 +223,9 @@ def get_fantasy_strategy(self, inputs, targets, full_inputs, full_targets, full_
new_root = BatchRepeatLinearOperator(DenseLinearOperator(new_root), repeat_shape)
# no need to repeat the covar cache, broadcasting will do the right thing

if isinstance(full_output, MultitaskMultivariateNormal):
full_mean = full_mean.view(*target_batch_shape, -1, num_tasks).contiguous()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

instead of .view().contiguous(), can also just use reshape() here


# Create new DefaultPredictionStrategy object
fant_strat = self.__class__(
train_inputs=full_inputs,
Expand Down Expand Up @@ -285,7 +302,11 @@ def exact_predictive_mean(self, test_mean: Tensor, test_train_covar: LinearOpera
# NOTE TO FUTURE SELF:
# You **cannot* use addmv here, because test_train_covar may not actually be a non lazy tensor even for an exact
# GP, and using addmv requires you to to_dense test_train_covar, which is obviously a huge no-no!
res = (test_train_covar @ self.mean_cache.unsqueeze(-1)).squeeze(-1)

if len(self.mean_cache.shape) == 4:
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The main issue I have with this PR is this line.

While working with a simple BO loop using BoTorch to test my code changes and observe the shapes of everything going through the code, I found that sometimes test_train_covar would be of size [5,1,4,24] and self.mean_cache would be of size [5,1,1,24]. This is why I have an if-statement here. I'm not sure if there's a better way to check if we're working with a derivative enabled GP in this function.

These observed shapes are also why my unit test has new_x and new_y as shapes (1, 1, dim) and (num_fantasies, 1, 1, 1 + dim) respectively, since that's what I observed from my test code.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm curious if anyone has any thoughts about this!

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I found that sometimes test_train_covar would be of size [5,1,4,24] and self.mean_cache would be of size [5,1,1,24]

Do you have a sense for why this is? Is this some insufficient invalidation of self.mean_cache? Figure I'd ask first before getting into a rabbit hole here...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Honestly I'm not 100% sure, but self.mean_cache appears to be set to size [5,1,1,24] from a call to exact_gp.y:239 when the new prediction_strategy is created

This is called from get_fantasy_model(self, inputs, targets) where

# The below are torch.tensors, I just show their dimensions
inputs =[torch.Size([1,1,3])
targets = torch.Size([5,1,1,4]) 

And old_pred_strat.get_fantasy_strategy(inputs, targets, full_inputs, full_targets, full_output) is called with:

# The below are torch.tensors, I just show their dimensions
inputs = [torch.Size([1,1,3])]
targets = torch.Size([5,1,1,4]) 
full_inputs = [torch.Size([1, 6, 3])] 
full_targets = torch.Size([5, 1, 6, 4])

full_output = MultitaskMultivariateNormal(loc: torch.Size([1, 24]))

The targets come from model.py:335 in BoTorch from a call to sampler. I don't specify a sampler in my code, I just use whatever default sampler is in qKnowledgeGradient:

scal_transf = ScalarizedPosteriorTransform(weights=torch.tensor([1.0] + [0.0]*dim, dtype=torch.double))
        
# Define qKG acquisition function
qKG = qKnowledgeGradient(model,\
            posterior_transform=scal_transf,\
            num_fantasies=5)

Hopefully this helps! I'm not sure what the expected behavior should be, but please let me know how I can help.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if the extra 1 appearing is just a soft incompatibility we never noticed where BoTorch requires an explicit task dim for the labels, but we don't in gpytorch. Indeed, my default is usually to have a single dim label vector, so when writing the code something like this could have slipped by me.

res = (test_train_covar @ self.mean_cache.squeeze(1).unsqueeze(-1)).squeeze(-1)
else:
res = (test_train_covar @ self.mean_cache.unsqueeze(-1)).squeeze(-1)
res = res + test_mean

return res
Expand Down
62 changes: 62 additions & 0 deletions test/examples/test_derivative_gp_fantasy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#!/usr/bin/env python3

import unittest
from math import pi

import torch

import gpytorch
from gpytorch.distributions import MultitaskMultivariateNormal
from gpytorch.kernels import ScaleKernel, RBFKernelGrad
from gpytorch.likelihoods import MultitaskGaussianLikelihood
from gpytorch.means import ConstantMeanGrad
from gpytorch.test.base_test_case import BaseTestCase

# Simple training data
num_train_samples = 15
num_fantasies = 10
dim = 1
train_X = torch.linspace(0, 1, num_train_samples).reshape(-1, 1)
train_Y = torch.hstack([
torch.sin(train_X * (2 * pi)).reshape(-1, 1),
(2 * pi) * torch.cos(train_X * (2 * pi)).reshape(-1, 1),
])


class GPWithDerivatives(gpytorch.models.ExactGP):
def __init__(self, train_X, train_Y):
likelihood = MultitaskGaussianLikelihood(num_tasks=1 + dim)
super().__init__(train_X, train_Y, likelihood)
self.mean_module = ConstantMeanGrad()
self.base_kernel = RBFKernelGrad()
self.covar_module = ScaleKernel(self.base_kernel)
self._num_outputs = 1 + dim

def forward(self, x):
mean_x = self.mean_module(x)
covar_x = self.covar_module(x)
return MultitaskMultivariateNormal(mean_x, covar_x)


class TestDerivativeGPFutures(BaseTestCase, unittest.TestCase):

# Inspired by test_lanczos_fantasy_model
def test_derivative_gp_futures(self):
model = GPWithDerivatives(train_X, train_Y)
mll = gpytorch.mlls.sum_marginal_log_likelihood.ExactMarginalLogLikelihood(model.likelihood, model)

mll.train()
mll.eval()

# get a posterior to fill in caches
model(torch.randn(num_train_samples).reshape(-1, 1))

new_x = torch.randn((1, 1, dim))
new_y = torch.randn((num_fantasies, 1, 1, 1 + dim))

# just check that this can run without error
model.get_fantasy_model(new_x, new_y)


if __name__ == "__main__":
unittest.main()