From d45adc3b9c993fbafa49d4a60c5ff143d92e287a Mon Sep 17 00:00:00 2001 From: peibensteine Date: Sat, 5 Oct 2024 17:21:28 +0200 Subject: [PATCH 01/39] Change all newlines to LF, add basics to gitignore and get running --- .gitignore | 41 +++++++++++++++++++ stpy/helpers/ellipsoid_algorithms.py | 3 +- .../point_processes/poisson_rate_estimator.py | 28 ++----------- 3 files changed, 46 insertions(+), 26 deletions(-) diff --git a/.gitignore b/.gitignore index 5d34afb..6fa4e72 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,44 @@ sensepy/__pycache__ *.txt *.png *.pdf +*.pyo +*.pyd +*.pdb +*.egg +*.egg-info +*.whl +*.manifest +*.spec +*.log +*.pot +*.mo +*.so +*.dll +*.dylib +*.a +*.lib +*.swp +*.swo +*.tmp +*.bak +*.old +*.orig +*.rej +*.sublime-project +*.sublime-workspace +*.project +*.pydevproject +*.idea/ +.vscode/ +__pycache__/ +*.coverage +.coverage.* +.cache +.tox/ +.nox/ +.pytest_cache/ +htmlcov/ +dist/ +build/ +site/ +docs/_build/ diff --git a/stpy/helpers/ellipsoid_algorithms.py b/stpy/helpers/ellipsoid_algorithms.py index 112d077..a67ed38 100644 --- a/stpy/helpers/ellipsoid_algorithms.py +++ b/stpy/helpers/ellipsoid_algorithms.py @@ -208,8 +208,7 @@ def maximize_on_elliptical_slice(x, Sigma, mu, c, l, Lambda, u): constraints.append(Lambda @ theta >= l) constraints.append(Lambda @ theta <= u) prob = cp.Problem(obj_max, constraints) - prob.solve(solver=cp.MOSEK, verbose=False - , mosek_params={mosek.iparam.intpnt_solve_form: mosek.solveform.dual}) + prob.solve(solver=cp.SCS, verbose=True) val = prob.value theta = theta.value return val, theta diff --git a/stpy/point_processes/poisson_rate_estimator.py b/stpy/point_processes/poisson_rate_estimator.py index d91658e..dbbb1c5 100644 --- a/stpy/point_processes/poisson_rate_estimator.py +++ b/stpy/point_processes/poisson_rate_estimator.py @@ -1026,12 +1026,7 @@ def penalized_likelihood(self, threads=4): theta.value = self.rate.numpy() try: - prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False, - mosek_params={mosek.iparam.num_threads: threads, - mosek.iparam.intpnt_solve_form: mosek.solveform.dual, - mosek.dparam.intpnt_co_tol_pfeas: 1e-4, - mosek.dparam.intpnt_co_tol_dfeas: 1e-4, - mosek.dparam.intpnt_co_tol_rel_gap: 1e-4}) + prob.solve(solver=cp.CLARABEL, warm_start=False, verbose=True) self.rate = torch.from_numpy(theta.value) return self.rate @@ -1060,12 +1055,7 @@ def penalized_likelihood_integral(self, threads=4): # theta.value = self.rate.numpy() try: prob = cp.Problem(objective, constraints) - prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False, - mosek_params={mosek.iparam.num_threads: threads, - mosek.iparam.intpnt_solve_form: mosek.solveform.primal, - mosek.dparam.intpnt_co_tol_pfeas: 1e-4, - mosek.dparam.intpnt_co_tol_dfeas: 1e-4, - mosek.dparam.intpnt_co_tol_rel_gap: 1e-4}) + prob.solve(solver=cp.CLARABEL, warm_start=False, verbose=True) self.rate = torch.from_numpy(theta.value) except: print("Optimization failed. Using the old value.") @@ -1282,12 +1272,7 @@ def penalized_likelihood_bins(self, threads=4): + self.s * 0.5 * cp.sum_squares(theta)) prob = cp.Problem(objective, constraints) try: - prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False, - mosek_params={mosek.iparam.num_threads: threads, - mosek.iparam.intpnt_solve_form: mosek.solveform.dual, - mosek.dparam.intpnt_co_tol_pfeas: 1e-8, - mosek.dparam.intpnt_co_tol_dfeas: 1e-8, - mosek.dparam.intpnt_co_tol_rel_gap: 1e-8}) + prob.solve(solver=cp.CLARABEL, warm_start=False, verbose=True) self.rate = torch.from_numpy(theta.value) except: @@ -1314,12 +1299,7 @@ def penalized_likelihood_integral_bins(self, threads=4): prob = cp.Problem(objective, constraints) else: prob = cp.Problem(objective) - prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False, - mosek_params={mosek.iparam.num_threads: threads, - mosek.iparam.intpnt_solve_form: mosek.solveform.primal, - mosek.dparam.intpnt_co_tol_pfeas: 1e-6, - mosek.dparam.intpnt_co_tol_dfeas: 1e-6, - mosek.dparam.intpnt_co_tol_rel_gap: 1e-6}) + prob.solve(solver=cp.CLARABEL, warm_start=False, verbose=True) self.rate = torch.from_numpy(theta.value) except: print("Optimization failed. Using the old value.") From cb56a1d5cc69c360446b3a30c825a28e838157f7 Mon Sep 17 00:00:00 2001 From: peibensteine Date: Thu, 10 Oct 2024 17:21:35 +0200 Subject: [PATCH 02/39] some comments --- stpy/embeddings/embedding.py | 6 ++++ stpy/embeddings/positive_embedding.py | 38 +++++++++++++++++++++++++- stpy/point_processes/rate_estimator.py | 23 ++++++++++++++-- 3 files changed, 64 insertions(+), 3 deletions(-) diff --git a/stpy/embeddings/embedding.py b/stpy/embeddings/embedding.py index f8a5394..bdbafbb 100755 --- a/stpy/embeddings/embedding.py +++ b/stpy/embeddings/embedding.py @@ -118,6 +118,12 @@ def get_m(self): return self.m def integral(self, S): + """ + Compute the integral of the kernel over the set S + + :param S: Borel set + :return: array of length self.m of integrals of each basis function over the set S + """ a = S.bounds[:, 0] b = S.bounds[:, 1] psi = torch.zeros(self.m).double() diff --git a/stpy/embeddings/positive_embedding.py b/stpy/embeddings/positive_embedding.py index 7899ad0..2e41df1 100644 --- a/stpy/embeddings/positive_embedding.py +++ b/stpy/embeddings/positive_embedding.py @@ -1,7 +1,9 @@ +from typing import Optional import cvxpy as cp import mosek import numpy as np import scipy +from stpy.kernels import KernelFunction import torch from stpy.borel_set import BorelSet @@ -11,11 +13,28 @@ class PositiveEmbedding(Embedding): - def __init__(self, d, m, kernel_object=None, interval=(-1, 1), B=1, b=0, s=0.001, offset=0.): + def __init__(self, d, m, kernel_object: Optional[KernelFunction]=None, interval=(-1, 1), B=1, b=0, s=0.001, offset=0.): + """ + + Parameters + ---------- + d + Dimension of the embedding + m + Number of basis functions + b, optional + Minimal value of the rate function, by default 0 + B, optional + Maximal value of the rate function, by default 1 + """ self.d = d + """ Dimension of the embedding """ self.m = m + """ Number of basis functions """ self.b = b + """ Minimal value of the rate function """ self.size = self.get_m() + """ Number of basis functions times number of dimensions """ self.interval = interval if kernel_object is None: #self.kernel_object = KernelFunction() @@ -42,6 +61,13 @@ def integral(self, S): pass def basis_fun(self, x, j): + """ + Return the value of basis function \phi_j(x) + + :param x: double, need to be in the interval + :param j: integer, index of hat functions, 0 <= j <= m-1 + :return: \phi_j(x) + """ pass def get_constraints(self): @@ -52,6 +78,13 @@ def get_constraints(self): return (l, Lambda, u) def cov(self, inverse=False): + r"""Should return $\Gamma^T = \sqrt{V^{-1} K V^{-1}}^T$ + + $\sqrt{(V^TV)^* \cdot K}$ where $V_{ij} = \phi_i(t_j)$ and + $K_{ij} = k(t_i, t_j)$ and the $t_i$ are equally spaced grid points + in the cartesian product set $i^d$ where i is `self.interval` + + """ if self.precomp == False: dm = (self.interval[1] - self.interval[0]) / (self.m - 1) # delta m t = self.interval[0] + torch.linspace(0, self.m - 1, self.m) * dm @@ -83,6 +116,8 @@ def cov(self, inverse=False): return self.Gamma_half def embed_internal(self, x): + """ Returns a tensor $T$ where $T_{i,j} = \phi_j(x_i)$. + """ if self.d == 1: out = torch.zeros(size=(x.size()[0], self.m), dtype=torch.float64) for j in range(self.m): @@ -146,6 +181,7 @@ def fit(self, x, y, already_embeded=False): return mode def embed(self, x): + r"""Calculates $\Phi(x)^T = \phi(x)^T \Gamma^T$""" Gamma_half = self.cov() return self.embed_internal(x) @ Gamma_half diff --git a/stpy/point_processes/rate_estimator.py b/stpy/point_processes/rate_estimator.py index 016661f..e0b0927 100644 --- a/stpy/point_processes/rate_estimator.py +++ b/stpy/point_processes/rate_estimator.py @@ -1,3 +1,4 @@ +from typing import List import numpy as np import torch @@ -15,7 +16,26 @@ def get_min_max(self): return (np.min(volumes), np.max(volumes)) - def load_data(self, data, times=True): + def load_data(self, data: List, times=True): + r"""Load the data and save $\Phi(x)$ into `self.observations`, $n(A_i)$ in + `self.counts` and $\int_{A_i} \phi_j(x) dx$ into `self.phis` + + + Parameters + ---------- + data + + List of samples, where each sample is a tuple of + + * The Borel set on which the data lies + * A tensor of the datapoints them selves i.e. of shape + [num_data_points, self.d...] + * The amount of time in minutes that the data spans + i.e. max time - min time of all data points + + times, optional + by default True + """ self.approx_fit = False if len(data) > 0: @@ -35,7 +55,6 @@ def load_data(self, data, times=True): if obs is not None: obs, _, duplicates = torch.unique(obs, dim=0, return_inverse=True, return_counts=True) - #obs = torch.diag(torch.exp(duplicates.double()))@obs\ obs = torch.einsum('ij,i->ij', obs, duplicates) if times == True: From b4d641b36603980ae6533165e0a331ad37650423 Mon Sep 17 00:00:00 2001 From: peibensteine Date: Fri, 11 Oct 2024 09:09:22 +0200 Subject: [PATCH 03/39] Upgrade libraries, remove .egg-info file --- setup.py | 3 +-- stpy.egg-info/PKG-INFO | 8 -------- stpy.egg-info/not-zip-safe | 1 - stpy/continuous_processes/gauss_procc.py | 2 -- 4 files changed, 1 insertion(+), 13 deletions(-) delete mode 100644 stpy.egg-info/PKG-INFO delete mode 100644 stpy.egg-info/not-zip-safe diff --git a/setup.py b/setup.py index 07cd061..060143a 100755 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ 'numpy', 'scipy', 'matplotlib', - 'sklearn', + 'scikit-learn', 'tensorflow', 'cvxpy', 'torch', @@ -13,7 +13,6 @@ 'mosek', 'quadprog', 'cvxpylayers', - 'functorch', 'autograd_minimize' ] # diff --git a/stpy.egg-info/PKG-INFO b/stpy.egg-info/PKG-INFO deleted file mode 100644 index b9cb176..0000000 --- a/stpy.egg-info/PKG-INFO +++ /dev/null @@ -1,8 +0,0 @@ -Metadata-Version: 2.1 -Name: stpy -Version: 0.0.2 -Summary: Stochastic Process Library for Python -Home-page: -Author: Mojmir Mutny -Author-email: mojmir.mutny@inf.ethz.ch -License: custom diff --git a/stpy.egg-info/not-zip-safe b/stpy.egg-info/not-zip-safe deleted file mode 100644 index 8b13789..0000000 --- a/stpy.egg-info/not-zip-safe +++ /dev/null @@ -1 +0,0 @@ - diff --git a/stpy/continuous_processes/gauss_procc.py b/stpy/continuous_processes/gauss_procc.py index d7379ff..b0af70e 100755 --- a/stpy/continuous_processes/gauss_procc.py +++ b/stpy/continuous_processes/gauss_procc.py @@ -4,8 +4,6 @@ import scipy as scipy import torch from cvxpylayers.torch import CvxpyLayer -#from functorch import hessian -import functorch from pymanopt.manifolds import Euclidean, Stiefel, PSDFixedRank from torch.autograd import grad from torchmin import minimize as minimize_torch From 9e64b58a4e914d24c6afdfe51fbe656560650c86 Mon Sep 17 00:00:00 2001 From: peibensteine Date: Fri, 11 Oct 2024 10:28:12 +0200 Subject: [PATCH 04/39] format code with black --- setup.py | 48 +- stpy/approx_inference/expected-propagation.py | 98 +- stpy/approx_inference/hmc.py | 12 +- stpy/approx_inference/langevin.py | 42 +- stpy/approx_inference/proximal_langevin.py | 29 +- stpy/approx_inference/sampling_helper.py | 82 +- stpy/approx_inference/variational_mf.py | 1426 +++--- stpy/borel_set.py | 585 +-- stpy/candidate_set.py | 107 +- .../categorical_mixture.py | 351 +- stpy/continuous_processes/convex_rkhs.py | 134 +- .../continuous_processes/dirichlet_mixture.py | 230 +- stpy/continuous_processes/fourier_fea.py | 1144 +++-- stpy/continuous_processes/ga_process.py | 436 +- stpy/continuous_processes/gauss_procc.py | 2644 +++++----- .../kernelized_features.py | 1232 ++--- .../kernelized_features_old.py | 1374 ++--- stpy/continuous_processes/mkl_estimator.py | 470 +- stpy/continuous_processes/mkl_features.py | 370 +- stpy/continuous_processes/nystrom_fea.py | 724 +-- stpy/continuous_processes/primal_mkl.py | 399 +- stpy/continuous_processes/trace_features.py | 212 +- .../truncated_kernelized_features.py | 147 +- stpy/dimred/sri.py | 240 +- stpy/embeddings/bernstein_embedding.py | 753 +-- stpy/embeddings/bump_bases.py | 746 +-- stpy/embeddings/embedding.py | 1396 +++--- stpy/embeddings/onehot_embedding.py | 42 +- stpy/embeddings/optimal_positive_basis.py | 365 +- stpy/embeddings/packing_embedding.py | 189 +- stpy/embeddings/polynomial_embedding.py | 345 +- stpy/embeddings/positive_embedding.py | 431 +- stpy/embeddings/random_nn.py | 328 +- stpy/embeddings/transformations.py | 112 +- stpy/embeddings/weighted_embedding.py | 11 +- stpy/estimator.py | 1518 +++--- stpy/feature_importance/feature_ranker.py | 84 +- .../conditional_generative_model.py | 7 +- stpy/generative_models/cvae.py | 82 +- stpy/generative_models/generative_sampler.py | 5 +- stpy/helpers/ColorDB.py | 394 +- stpy/helpers/abitrary_sampling.py | 362 +- stpy/helpers/coreset_helper.py | 36 +- stpy/helpers/ellipsoid_algorithms.py | 816 +-- stpy/helpers/haarfisz_transform.py | 157 +- stpy/helpers/helper.py | 857 ++-- stpy/helpers/plot_helper.py | 225 +- stpy/helpers/plotting_helper.py | 35 +- stpy/helpers/posterior_sampling.py | 416 +- stpy/helpers/quadrature_helper.py | 495 +- stpy/helpers/scores.py | 5 +- stpy/helpers/transformations.py | 82 +- stpy/helpers/wavelets.py | 30 +- stpy/kernel_functions/additive_decorator.py | 5 +- stpy/kernel_functions/ard_kernel.py | 148 +- stpy/kernel_functions/covar_kernel.py | 27 +- stpy/kernel_functions/custom_map_kernel.py | 17 +- stpy/kernel_functions/gibbs_custom_kernel.py | 29 +- stpy/kernel_functions/gibbs_kernel.py | 31 +- stpy/kernel_functions/kernel_params.py | 17 +- stpy/kernel_functions/laplace_kernel.py | 15 +- stpy/kernel_functions/linear_kernel.py | 25 +- .../squared_exponential_kernel.py | 68 +- stpy/kernel_functions/step_kernel.py | 23 +- stpy/kernels.py | 2274 ++++----- stpy/legacy/integral_kernels.py | 1172 ++--- stpy/legacy/integral_kernels2.py | 196 +- stpy/optim/cost_functions.py | 107 +- stpy/optim/custom_optimizers.py | 577 +-- stpy/optim/frank_wolfe.py | 102 +- stpy/optim/hyper_parameter_opt.py | 268 +- stpy/optim/manifold_optimization.py | 85 +- .../binomial/binomial_process.py | 64 +- .../binomial/binomial_process_estimator.py | 1106 +++-- .../link_fun_rate_estimator.py | 1009 ++-- .../log_link_rate_estimator.py | 468 +- stpy/point_processes/loglinear_estimator.py | 360 +- .../point_processes/mbr_positive_estimator.py | 769 +-- stpy/point_processes/poisson.py | 337 +- .../poisson/link_fun_rate_estimator.py | 1010 ++-- .../poisson/loglinear_estimator.py | 352 +- .../poisson/mbr_positive_estimator.py | 767 +-- stpy/point_processes/poisson/poisson.py | 328 +- .../point_processes/poisson_rate_estimator.py | 4407 ++++++++++------- .../positive_basis_estimator.py | 241 +- stpy/point_processes/rate_estimator.py | 416 +- .../point_processes/seasonal_point_process.py | 169 +- stpy/probability/bernoulli_likelihood.py | 128 +- stpy/probability/gaussian_likelihood.py | 176 +- stpy/probability/huber_likelihood.py | 25 +- stpy/probability/laplace_likelihood.py | 115 +- stpy/probability/likelihood.py | 67 +- stpy/probability/noise_models.py | 643 +-- stpy/probability/poisson_likelihood.py | 107 +- stpy/probability/robust_likelihood.py | 66 +- stpy/probability/weibul_likelihood.py | 55 +- stpy/random_process.py | 865 ++-- stpy/regularization/constraints.py | 12 +- stpy/regularization/regularizer.py | 87 +- stpy/regularization/sdp_constraint.py | 20 +- stpy/regularization/simplex_regularizer.py | 45 +- stpy/sampling/hmc.py | 12 +- stpy/sampling/langevin.py | 43 +- stpy/sampling/proximal_langevin.py | 30 +- stpy/sampling/sampling_helper.py | 88 +- stpy/test_functions/benchmarks.py | 943 ++-- stpy/test_functions/neural_net.py | 384 +- .../parallel_coordinates_plot.py | 104 +- stpy/test_functions/protein_benchmark.py | 872 ++-- stpy/test_functions/swissfel_simulator.py | 233 +- stpy/test_functions/test_functions.py | 1411 +++--- tests/SRI_test.py | 170 +- tests/clenshaw_curtis_test.py | 63 +- tests/constrained_mean.py | 26 +- .../eigenvector_constraint.py | 82 +- .../psd_minimization/psd_minimization.py | 118 +- .../domain_non_stationarity.py | 114 +- .../test_estimators/group l_q_estimator.py | 50 +- .../group_l_q_estimator_budget.py | 49 +- .../test_estimators/l_q_estimator.py | 77 +- .../test_estimators/qff_nonstationary.py | 131 +- .../test_regularized_dictionary_l2.py | 110 +- tests/continous_processes/test_huber_loss.py | 84 +- .../test_marginalized_pytorch_minimize.py | 24 +- tests/continous_processes/test_svr_loss.py | 141 +- .../test_unif_marginalized.py | 60 +- tests/convergence_test.py | 41 +- tests/cvxopt_integer_test.py | 167 +- tests/cvxpy_integer_test.py | 161 +- tests/dpps_tests.py | 6 +- tests/embedding/faber_schauder_embedding.py | 16 +- tests/fourier-features-multidimensional.py | 34 +- tests/gibbs_kernel.py | 60 +- tests/gradient_confidence_test.py | 34 +- tests/gradients_test.py | 85 +- tests/hessian-estimation-test.py | 50 +- tests/interval_groups_test.py | 19 +- tests/kernelized-features-test.py | 38 +- tests/kernels/ard_matern_kernel_test.py | 16 +- tests/marginalized_likelihood_test.py | 194 +- tests/orthogonal_map_test.py | 58 +- tests/regularization_basis.py | 118 +- tests/spike-basis-general.py | 31 +- tests/test-absolute-deviation.py | 38 +- tests/test-positive-basis.py | 16 +- tests/test_functions/felsimulator_test.py | 6 +- tests/triangle-integration-test.py | 77 +- 147 files changed, 26725 insertions(+), 22219 deletions(-) diff --git a/setup.py b/setup.py index 060143a..3b39c8b 100755 --- a/setup.py +++ b/setup.py @@ -1,28 +1,30 @@ from setuptools import setup packages = [ - 'numpy', - 'scipy', - 'matplotlib', - 'scikit-learn', - 'tensorflow', - 'cvxpy', - 'torch', - 'pymanopt', - 'pandas', - 'mosek', - 'quadprog', - 'cvxpylayers', - 'autograd_minimize' + "numpy", + "scipy", + "matplotlib", + "scikit-learn", + "tensorflow", + "cvxpy", + "torch", + "pymanopt", + "pandas", + "mosek", + "quadprog", + "cvxpylayers", + "autograd_minimize", ] # -setup(name='stpy', - version='0.0.2', - description='Stochastic Process Library for Python', - url='', - author='Mojmir Mutny', - author_email='mojmir.mutny@inf.ethz.ch', - license='custom ', - packages=['stpy'], - zip_safe=False, - install_requires=packages) +setup( + name="stpy", + version="0.0.2", + description="Stochastic Process Library for Python", + url="", + author="Mojmir Mutny", + author_email="mojmir.mutny@inf.ethz.ch", + license="custom ", + packages=["stpy"], + zip_safe=False, + install_requires=packages, +) diff --git a/stpy/approx_inference/expected-propagation.py b/stpy/approx_inference/expected-propagation.py index 44b6b0e..fbb1132 100644 --- a/stpy/approx_inference/expected-propagation.py +++ b/stpy/approx_inference/expected-propagation.py @@ -3,63 +3,67 @@ from scipy.stats import multivariate_normal -class ExpectedPropagationQuadratic(): +class ExpectedPropagationQuadratic: - def __init__(self, mu_prior, Sigma_prior, likelihood_single, data): + def __init__(self, mu_prior, Sigma_prior, likelihood_single, data): - # takes two arguments param, theta - self.likelihood_single = likelihood_single + # takes two arguments param, theta + self.likelihood_single = likelihood_single - # prior information - self.mu_prior = mu_prior - self.Sigma_prior = Sigma_prior + # prior information + self.mu_prior = mu_prior + self.Sigma_prior = Sigma_prior - self.d = mu_prior.size()[1] + self.d = mu_prior.size()[1] - self.n = len(self.data) - self.data = data + self.n = len(self.data) + self.data = data - self.approx = [] - for i in range(self.n): - mu = torch.zeros(size=(1, self.d)).double() - Sigma = torch.eye(size=(self.d, self.d)).double() - self.approx.append((mu, Sigma)) + self.approx = [] + for i in range(self.n): + mu = torch.zeros(size=(1, self.d)).double() + Sigma = torch.eye(size=(self.d, self.d)).double() + self.approx.append((mu, Sigma)) - def marginalized_version(self, j): - mu = torch.zeros(size=(1, self.d)).double() - Sigma = torch.zeros(size=(self.d, self.d)).double() + def marginalized_version(self, j): + mu = torch.zeros(size=(1, self.d)).double() + Sigma = torch.zeros(size=(self.d, self.d)).double() - for i in range(self.n): - if i != j: - Sigma_elem = self.approx[j][0] - mu_elem = self.approx[j][1] - Sigma_elem_inv = torch.inverse(Sigma_elem) - mu += Sigma_elem_inv @ mu_elem - Sigma += Sigma_elem_inv - Sigma = torch.inverse(Sigma) - mu = Sigma @ mu - return (mu, Sigma) + for i in range(self.n): + if i != j: + Sigma_elem = self.approx[j][0] + mu_elem = self.approx[j][1] + Sigma_elem_inv = torch.inverse(Sigma_elem) + mu += Sigma_elem_inv @ mu_elem + Sigma += Sigma_elem_inv + Sigma = torch.inverse(Sigma) + mu = Sigma @ mu + return (mu, Sigma) - def match_likelihood(self, j): - mu, Sigma = self.marginalized_version(j) - lik = lambda x: self.likelihood_single(torch.from_numpy(x), self.data[j]).numpy() - prob = lambda x: multivariate_normal.pdf(x, mean=mu.view(-1).reshape.numpy(), cov=Sigma.numpy()) - first_moment = integrate.quad(lambda x: x * lik(x) * prob(x), 0.0, 10e10) - second_moment = integrate.quad(lambda x: x * x * lik(x) * prob(x), 0.0, 10e10) + def match_likelihood(self, j): + mu, Sigma = self.marginalized_version(j) + lik = lambda x: self.likelihood_single( + torch.from_numpy(x), self.data[j] + ).numpy() + prob = lambda x: multivariate_normal.pdf( + x, mean=mu.view(-1).reshape.numpy(), cov=Sigma.numpy() + ) + first_moment = integrate.quad(lambda x: x * lik(x) * prob(x), 0.0, 10e10) + second_moment = integrate.quad(lambda x: x * x * lik(x) * prob(x), 0.0, 10e10) - self.approx[j][0] = first_moment - self.approx[j][1] = second_moment + self.approx[j][0] = first_moment + self.approx[j][1] = second_moment - return (first_moment, second_moment - first_moment ** 2) + return (first_moment, second_moment - first_moment**2) - def finalize(self): - pass + def finalize(self): + pass - def fit_gp(self, iterations='auto'): - if iterations == 'auto': - T = 100 - for i in range(T): - for j in range(self.n): - self.match_likelihood(j) - mu, Sigma = self.finalize() - return mu, Sigma + def fit_gp(self, iterations="auto"): + if iterations == "auto": + T = 100 + for i in range(T): + for j in range(self.n): + self.match_likelihood(j) + mu, Sigma = self.finalize() + return mu, Sigma diff --git a/stpy/approx_inference/hmc.py b/stpy/approx_inference/hmc.py index 879fd17..1e6ce13 100644 --- a/stpy/approx_inference/hmc.py +++ b/stpy/approx_inference/hmc.py @@ -1,5 +1,7 @@ -params_hmc = hamiltorch.sample(log_prob_func=log_prob_func, - params_init=params_init, - num_samples=num_samples, - step_size=step_size, - num_steps_per_sample=num_steps_per_sample) +params_hmc = hamiltorch.sample( + log_prob_func=log_prob_func, + params_init=params_init, + num_samples=num_samples, + step_size=step_size, + num_steps_per_sample=num_steps_per_sample, +) diff --git a/stpy/approx_inference/langevin.py b/stpy/approx_inference/langevin.py index 3ed2dc2..21e430a 100644 --- a/stpy/approx_inference/langevin.py +++ b/stpy/approx_inference/langevin.py @@ -3,25 +3,29 @@ import torch -class LangevinSampler(): +class LangevinSampler: - def __init__(self, verbose=False): - self.verbose = verbose - pass + def __init__(self, verbose=False): + self.verbose = verbose + pass - def calculate(self, HessianF, theta0): - W = HessianF(theta0) - L = float(scipy.sparse.linalg.eigsh(W.numpy(), k=1, which='LM', return_eigenvectors=False, tol=1e-3)) - return L + def calculate(self, HessianF, theta0): + W = HessianF(theta0) + L = float( + scipy.sparse.linalg.eigsh( + W.numpy(), k=1, which="LM", return_eigenvectors=False, tol=1e-3 + ) + ) + return L - def sample(self, F, nablaF, HessianF, theta0, steps=100): - L = self.calculate(HessianF, theta0) - eta = 0.5 / (L + 1) - m = theta0.size()[0] - theta = theta0 - for k in range(steps): - w = torch.randn(size=(m, 1)).double() - theta = theta - eta * nablaF(theta) + np.sqrt(2 * eta) * w - if self.verbose == True: - print("Iter:", k, theta.T) - return theta + def sample(self, F, nablaF, HessianF, theta0, steps=100): + L = self.calculate(HessianF, theta0) + eta = 0.5 / (L + 1) + m = theta0.size()[0] + theta = theta0 + for k in range(steps): + w = torch.randn(size=(m, 1)).double() + theta = theta - eta * nablaF(theta) + np.sqrt(2 * eta) * w + if self.verbose == True: + print("Iter:", k, theta.T) + return theta diff --git a/stpy/approx_inference/proximal_langevin.py b/stpy/approx_inference/proximal_langevin.py index f1da7b6..fec409d 100644 --- a/stpy/approx_inference/proximal_langevin.py +++ b/stpy/approx_inference/proximal_langevin.py @@ -3,18 +3,23 @@ def ProximalLangevin(LangevinSampler): - def sample(self, F, nablaF, HessianF, theta0, prox, steps=100): - L = self.calculate(HessianF, theta0) - eta = 0.5 / (L + 1) - m = theta0.size()[0] - theta = theta0 - for k in range(steps): - w = torch.randn(size=(m, 1)).double() - theta = (1 - eta) * theta - eta * nablaF(theta) + eta * prox(theta) + np.sqrt(2 * eta) * w - if self.verbose == True: - print("Iter:", k, theta.T) - return prox(theta) + def sample(self, F, nablaF, HessianF, theta0, prox, steps=100): + L = self.calculate(HessianF, theta0) + eta = 0.5 / (L + 1) + m = theta0.size()[0] + theta = theta0 + for k in range(steps): + w = torch.randn(size=(m, 1)).double() + theta = ( + (1 - eta) * theta + - eta * nablaF(theta) + + eta * prox(theta) + + np.sqrt(2 * eta) * w + ) + if self.verbose == True: + print("Iter:", k, theta.T) + return prox(theta) def MirrorLangevin(LangvinSampler): - pass + pass diff --git a/stpy/approx_inference/sampling_helper.py b/stpy/approx_inference/sampling_helper.py index 8976e70..960a10e 100644 --- a/stpy/approx_inference/sampling_helper.py +++ b/stpy/approx_inference/sampling_helper.py @@ -4,53 +4,53 @@ def get_increment(eta, steps, f, w0, path=False): - """ + """ - :param eta: terminal time - :param steps: number of steps - :param f: the operator - :param w0: initial point - :return: - """ + :param eta: terminal time + :param steps: number of steps + :param f: the operator + :param w0: initial point + :return: + """ - tau = eta / steps - w = w0 - sequence = [] + tau = eta / steps + w = w0 + sequence = [] - for i in range(steps): + for i in range(steps): - n = torch.randn(size=w0.size()).double() - w = w + np.sqrt(2 * tau) * f(w, n) - if path: - sequence.append(w) + n = torch.randn(size=w0.size()).double() + w = w + np.sqrt(2 * tau) * f(w, n) + if path: + sequence.append(w) - if path: - return sequence - else: - return w + if path: + return sequence + else: + return w if __name__ == "__main__": - f = lambda w: torch.diag(1. / torch.abs(w.view(-1))) - d = 1 - w0 = torch.zeros(size=(d, 1)).double() + 2 - step = 100 - path = get_increment(2, step, f, w0, path=True) - # plt.plot(path) - - i = 0 - colors = ['k', 'r', 'b', 'orange', 'brown', 'purple'] - for steps in [5, 10, 20, 100, 200, 500]: - - repeats = 100 - ws = [] - for _ in range(repeats): - path = get_increment(2, steps, f, w0, path=True) - xtest = torch.linspace(0, 2, steps) - plt.plot(xtest, path, color=colors[i]) - i = i + 1 - # plt.hist(np.array(ws), label = str(step)) - - plt.legend() - plt.show() + f = lambda w: torch.diag(1.0 / torch.abs(w.view(-1))) + d = 1 + w0 = torch.zeros(size=(d, 1)).double() + 2 + step = 100 + path = get_increment(2, step, f, w0, path=True) + # plt.plot(path) + + i = 0 + colors = ["k", "r", "b", "orange", "brown", "purple"] + for steps in [5, 10, 20, 100, 200, 500]: + + repeats = 100 + ws = [] + for _ in range(repeats): + path = get_increment(2, steps, f, w0, path=True) + xtest = torch.linspace(0, 2, steps) + plt.plot(xtest, path, color=colors[i]) + i = i + 1 + # plt.hist(np.array(ws), label = str(step)) + + plt.legend() + plt.show() diff --git a/stpy/approx_inference/variational_mf.py b/stpy/approx_inference/variational_mf.py index 5fff78d..9eae29c 100644 --- a/stpy/approx_inference/variational_mf.py +++ b/stpy/approx_inference/variational_mf.py @@ -16,9 +16,10 @@ You should have received a copy of the GNU General Public License along with SGCP_Inference. If not, see . """ -__author__ = 'Christian Donner' -__email__ = 'christian.donner(at)bccn-berlin.de' -__license__ = 'gpl-3.0' + +__author__ = "Christian Donner" +__email__ = "christian.donner(at)bccn-berlin.de" +__license__ = "gpl-3.0" import time @@ -28,673 +29,752 @@ from scipy.special import digamma, gammaln -class VMF_SGCP(): - - def __init__(self, S_borders, X, cov_params, num_inducing_points, - lmbda_star=None, conv_crit=1e-4, - num_integration_points=1000, output=False, - update_hyperparams=True, - noise=1e-4, epsilon=5e-2): - """ Class initialisation for variational mean field inference for - sigmoidal Gaussian Cox process. - - :param S_borders: numpy.ndarray [D x 2] - Limits of the region of interest. - :param X: numpy.ndarray [num_points x D] - Positions of the observations. - :param cov_params: numpy.ndarray [D + 1] - Hyperparameters of the covariance functions. First is amplitude, - and the others the length scale for each dimension. - :param num_inducing_points: int - Number of inducing points (Should be a power of dimensions) - :param lmbda_star: float - Maximal intensity. If None it is initialized as twice the mean - observation rate for a homogeneous process. (Default=None) - :param conv_crit: - Convergence criterion, when algorithm should stop. (Default=1e-4) - :param num_integration_points: int - Number of points that should be used for Monte Carlo integration. - (Default = 1000) - :param output: bool - Prints info after each optimisation step. (Default=False) - :param update_hyperparams: bool - Whether the hyperparameters are updated (by Adam) or not. ( - Default=False) - :param noise: float - Noise added to the diagonal of the covariance matrix (should be - small). (Default=1e-4) - param epsilon: float - Step size for Adam in the hyperparameter update. (Default=5e-2) - """ - - self.S_borders = S_borders - self.S = S_borders[:, 1] - S_borders[:, 0] - self.R = numpy.prod(self.S) - self.D = S_borders.shape[0] - self.noise = noise - self.cov_params = cov_params - self.num_integration_points = num_integration_points - self.num_inducing_points = num_inducing_points # must be power of D - self.X = X - - self.place_inducing_points() - self.mu_g_s = numpy.zeros(self.induced_points.shape[0]) - self.Sigma_g_s = numpy.identity(self.induced_points.shape[0]) - self.Sigma_g_s_inv = numpy.identity(self.induced_points.shape[0]) - self.Ks = self.cov_func(self.induced_points, self.induced_points) - L = numpy.linalg.cholesky(self.Ks + self.noise * numpy.eye( - self.Ks.shape[0])) - L_inv = solve_triangular(L, numpy.eye(L.shape[0]), lower=True, - check_finite=False) - self.Ks_inv = L_inv.T.dot(L_inv) - self.logdet_Ks = 2. * numpy.sum(numpy.log(L.diagonal())) - - self.place_integration_points() - self.ks_X = self.cov_func(self.induced_points, self.X) - self.LB_list = [] - self.times = [] - - self.kappa_X = self.Ks_inv.dot(self.ks_X) - self.kappa_int_points = self.Ks_inv.dot(self.ks_int_points) - self.mu_g_X, var_g_X = self.predictive_posterior_GP(self.X, 'X') - self.mu_g2_X = var_g_X + self.mu_g_X ** 2 - self.mu_g_int_points, var_g_int_points = self.predictive_posterior_GP( - self.integration_points, 'int_points') - self.mu_g2_int_points = var_g_int_points + self.mu_g_int_points ** 2 - self.epsilon = epsilon - self.alpha0 = 4. - self.beta0 = 2. / (float(self.X.shape[0] / self.R)) - if lmbda_star is None: - self.lmbda_star_q1 = self.alpha0 / self.beta0 - self.log_lmbda_star_q1 = digamma(self.alpha0) - numpy.log(self.beta0) - else: - self.lmbda_star_q1 = lmbda_star - self.log_lmbda_star_q1 = numpy.log(lmbda_star) - self.alpha_q1 = self.alpha0 - self.beta_q1 = self.beta0 - self.convergence = numpy.inf - self.conv_crit = conv_crit - self.num_iterations = 0 - self.output = output - self.update_hyperparams = update_hyperparams - - # ADAM parameters - self.beta1_adam = .9 - self.beta2_adam = .99 - self.epsilon_adam = 1e-5 - self.m_hyper_adam = numpy.zeros(self.D + 1) - self.v_hyper_adam = numpy.zeros(self.D + 1) - self.m_bm_adam = numpy.zeros(self.D) - self.v_bm_adam = numpy.zeros(self.D) - - def place_inducing_points(self): - """ Places the induced points for sparse GP. - """ - - num_per_dim = int(numpy.ceil(self.num_inducing_points ** (1. / self.D))) - induced_grid = numpy.empty([num_per_dim, self.D]) - for di in range(self.D): - dist_between_points = self.S[di] / num_per_dim - induced_grid[:, di] = numpy.arange(.5 * dist_between_points, - self.S[di], - dist_between_points) - - self.induced_points = numpy.meshgrid(*induced_grid.T.tolist()) - self.induced_points = numpy.array(self.induced_points).reshape([ - self.D, -1]).T - - def run(self): - """ Fitting function for the variational mean-field algorithm. - """ - - # Initialisation - self.times.append(time.perf_counter()) - self.calculate_PG_expectations() - self.calculate_posterior_intensity() - converged = False - while not converged: - self.num_iterations += 1 - # Update second factor q2 - self.calculate_postrior_GP() - self.update_predictive_posterior() - self.update_max_intensity() - # Update first factor q1 - self.calculate_PG_expectations() - self.calculate_posterior_intensity() - # Update hyperparameters - if self.update_hyperparams: - self.update_hyperparameters() - # Calculate lower bound - self.LB_list.append(self.calculate_lower_bound()) - # Check for convergence - if self.num_iterations > 1: - self.convergence = numpy.absolute(self.LB_list[-1] - - self.LB_list[ - -2]) / numpy.amax([numpy.abs(self.LB_list[-1]), - numpy.abs(self.LB_list[-2]), 1]) - converged = self.convergence < self.conv_crit - self.times.append(time.perf_counter()) - if self.output: - self.print_info() - - def print_info(self): - """ Functions to print info, while iteratively updating posterior. - """ - print((' +-----------------+ ' + - '\n | Iteration %4d |' + - '\n | Conv. = %.4f |' + - '\n +-----------------+') % (self.num_iterations, - self.convergence_inner)) - - def place_integration_points(self): - """ Places the integration points for Monte Carlo integration and - updates all related kernels. - """ - - self.integration_points = numpy.random.rand( - self.num_integration_points, self.D) - self.integration_points *= self.S[numpy.newaxis] - self.ks_int_points = self.cov_func(self.induced_points, - self.integration_points) - self.kappa_int_points = self.Ks_inv.dot(self.ks_int_points) - - def calculate_posterior_intensity(self): - """ The rate of the posterior process is updated. - """ - - self.lmbda_q2 = .5 * numpy.exp( - -.5 * self.mu_g_int_points + self.log_lmbda_star_q1) / \ - numpy.cosh(.5 * self.c_int_points) - - def calculate_PG_expectations(self): - """ The Polya-Gamma posterior is updated. - """ - - self.c_X = numpy.sqrt(self.mu_g2_X) - self.mu_omega_X = .5 / self.c_X * numpy.tanh( - .5 * self.c_X) - self.c_int_points = numpy.sqrt(self.mu_g2_int_points) - self.mu_omega_int_points = .5 / self.c_int_points \ - * numpy.tanh(.5 * self.c_int_points) - - def calculate_predictive_posterior_intensity(self, X_prime): - """ Calculates the posterior intensity at X_prime for the latent - Poisson process. (Not the intensity of the observed Poisson process!!!) - - :param X_prime: numpy.ndarray [num_points x D] - Position of points, that should be evaluated. - - :return: numpy.ndarray [num_points] - Posterior intensity. - """ - mu_g, var_g = self.predictive_posterior_GP(X_prime) - mu_g = mu_g - mu_g2 = var_g + mu_g ** 2 - c = numpy.sqrt(mu_g2) - pred_lmbda_q2 = .5 * numpy.exp( - -.5 * mu_g + self.log_lmbda_star_q1) / \ - numpy.cosh(.5 * c) - return pred_lmbda_q2 - - def calculate_postrior_GP(self): - """ The new GP at the inducing points is calculated. - """ - - A_int_points = self.lmbda_q2 * self.mu_omega_int_points - A_X = self.mu_omega_X - kAk = self.kappa_X.dot(A_X[:, numpy.newaxis] * self.kappa_X.T) + \ - self.kappa_int_points.dot(A_int_points[:, numpy.newaxis] * - self.kappa_int_points.T) \ - / self.num_integration_points * self.R - self.Sigma_g_s_inv = kAk + self.Ks_inv - L_inv = numpy.linalg.cholesky(self.Sigma_g_s_inv + self.noise * - numpy.eye( - self.Sigma_g_s_inv.shape[0])) - L = solve_triangular(L_inv, numpy.eye(L_inv.shape[0]), lower=True, - check_finite=False) - self.Sigma_g_s = L.T.dot(L) - self.logdet_Sigma_g_s = 2 * numpy.sum(numpy.log(L.diagonal())) - b_int_points = -.5 * self.lmbda_q2 - b_X = .5 * numpy.ones(self.X.shape[0]) - kb = self.ks_X.dot(b_X) + self.ks_int_points.dot(b_int_points) / \ - self.num_integration_points * self.R - self.mu_g_s = self.Sigma_g_s.dot(kb.dot(self.Ks_inv)) - - def predictive_posterior_GP(self, x_prime, points=None): - """ Computes the predictive posterior for given points - - :param x_prime: numpy.ndarray [num_points x D] - Points, which should be predicted for. - :param points: str - If 'int_points' or 'X' posterior for integration points or - observation points is calculated, respectively. (Default=None) - :returns: - numpy.ndarray [num_points]: mean of predictive posterior - numpy.ndarray [num_points]: variance of predictive posterior - """ - if points is None: - ks_x_prime = self.cov_func(self.induced_points, x_prime) - kappa = self.Ks_inv.dot(ks_x_prime) - elif points is 'int_points': - ks_x_prime = self.ks_int_points - kappa = self.kappa_int_points - elif points is 'X': - ks_x_prime = self.ks_X - kappa = self.kappa_X - - mu_g_x_prime = kappa.T.dot(self.mu_g_s) - K_xx = self.cov_func(x_prime, x_prime, only_diagonal=True) - var_g_x_prime = K_xx - numpy.sum(kappa * (ks_x_prime - kappa.T.dot( - self.Sigma_g_s).T), axis=0) - return mu_g_x_prime, var_g_x_prime - - def cov_func(self, x, x_prime, only_diagonal=False): - """ Computes the covariance functions between x and x_prime. - - :param x: numpy.ndarray [num_points x D] - Contains coordinates for points of x - :param x_prime: numpy.ndarray [num_points_prime x D] - Contains coordinates for points of x_prime - :param only_diagonal: bool - If true only diagonal is computed (Works only if x and x_prime - are the same, Default=False) - - :return: numpy.ndarray [num_points x num_points_prime] - ([num_points_prime] if only diagonal) - Kernel matrix. - """ - - theta_1, theta_2 = self.cov_params[0], self.cov_params[1] - if only_diagonal: - return theta_1 * numpy.ones(x.shape[0]) - - else: - x_theta2 = x / theta_2 - xprime_theta2 = x_prime / theta_2 - h = numpy.sum(x_theta2 ** 2, axis=1)[:, None] - 2. * numpy.dot( - x_theta2, xprime_theta2.T) + \ - numpy.sum(xprime_theta2 ** 2, axis=1)[None] - return theta_1 * numpy.exp(-.5 * h) - - def calculate_lower_bound(self): - """ Calculates the variational lower bound for current posterior. - - :return: float - Variational lower bound. - """ - - Sigma_s_mugmug = self.Sigma_g_s + numpy.outer(self.mu_g_s, self.mu_g_s) - f_int_points = .5 * (- self.mu_g_int_points - - self.mu_g2_int_points * self.mu_omega_int_points) - \ - numpy.log(2) - integrand = f_int_points - \ - numpy.log(self.lmbda_q2 * numpy.cosh(.5 * self.c_int_points)) \ - + self.log_lmbda_star_q1 + \ - .5 * self.c_int_points ** 2 * self.mu_omega_int_points + 1. - f_X = .5 * (self.mu_g_X - self.mu_g2_X * self.mu_omega_X) - \ - numpy.log(2) - summand = f_X + self.log_lmbda_star_q1 - numpy.log(numpy.cosh( - .5 * self.c_X)) + .5 * self.c_X ** 2 * self.mu_omega_X - - L = integrand.dot(self.lmbda_q2) / self.num_integration_points * self.R - L -= self.lmbda_star_q1 * self.R - L += numpy.sum(summand) - L -= .5 * numpy.trace(self.Ks_inv.dot(Sigma_s_mugmug)) - L -= .5 * self.logdet_Ks - L += .5 * self.logdet_Sigma_g_s + .5 * self.num_inducing_points - L += self.alpha0 * numpy.log(self.beta0) - gammaln(self.alpha0) + \ - (self.alpha0 - 1) * self.log_lmbda_star_q1 - \ - self.beta0 * self.lmbda_star_q1 - L += self.alpha_q1 - numpy.log(self.beta_q1) + gammaln(self.alpha_q1) \ - + (1. - self.alpha_q1) * digamma(self.alpha_q1) - - return L - - def update_max_intensity(self): - """ Updates the posterior for the maximal intensity. - """ - self.alpha_q1 = self.X.shape[0] + numpy.sum( - self.lmbda_q2) / self.num_integration_points * self.R + self.alpha0 - self.beta_q1 = self.beta0 + self.R - self.lmbda_star_q1 = self.alpha_q1 / self.beta_q1 - self.log_lmbda_star_q1 = digamma(self.alpha_q1) - \ - numpy.log(self.beta_q1) - - def update_kernels(self): - """ Updates all kernels (for inducing, observed and integration points). - """ - self.ks_int_points = self.cov_func(self.induced_points, - self.integration_points) - self.ks_X = self.cov_func(self.induced_points, self.X) - self.Ks = self.cov_func(self.induced_points, self.induced_points) - L = numpy.linalg.cholesky(self.Ks + self.noise * numpy.eye( - self.Ks.shape[0])) - L_inv = solve_triangular(L, numpy.eye(L.shape[0]), lower=True, - check_finite=False) - self.Ks_inv = L_inv.T.dot(L_inv) - self.logdet_Ks = 2. * numpy.sum(numpy.log(L.diagonal())) - self.kappa_X = self.Ks_inv.dot(self.ks_X) - self.kappa_int_points = self.Ks_inv.dot(self.ks_int_points) - - def calculate_hyperparam_derivative(self): - """ Calculates the derivative of the hyperparameters. - - :return: numpy.ndarray [D + 1] - Derivatives of hyperparameters. - """ - - theta1, theta2 = self.cov_params[0], numpy.copy( - self.cov_params[1]) - Sigma_s_mugmug = self.Sigma_g_s + numpy.outer(self.mu_g_s, self.mu_g_s) - dks_X = numpy.empty([self.ks_X.shape[0], self.ks_X.shape[1], - 1 + theta2.shape[0]]) - dks_int_points = numpy.empty( - [self.ks_int_points.shape[0], self.ks_int_points.shape[1], - 1 + theta2.shape[0]]) - dKs = numpy.empty([self.Ks.shape[0], self.Ks.shape[1], - 1 + theta2.shape[0]]) - dKss = numpy.zeros([1 + theta2.shape[0]]) - dKss[0] = 1. - - # kernel derivatives wrt theta1 - dks_X[:, :, 0] = self.ks_X / theta1 - dks_int_points[:, :, 0] = self.ks_int_points / theta1 - dKs[:, :, 0] = self.Ks / theta1 - # kernel derivatives wrt theta2 - dx = numpy.subtract(self.induced_points[:, None], - self.X[None]) - dks_X[:, :, 1:] = self.ks_X[:, :, None] * (dx ** 2) / \ - (theta2[None, None] ** 3) - dx = numpy.subtract(self.induced_points[:, None], - self.integration_points[None]) - dks_int_points[:, :, 1:] = self.ks_int_points[:, :, None] * \ - (dx ** 2) / (theta2[None, None] ** 3) - dx = numpy.subtract(self.induced_points[:, None], - self.induced_points[None]) - dKs[:, :, 1:] = self.Ks[:, :, None] * (dx ** 2) / ( - theta2[None, None] ** 3) - dL_dtheta = numpy.empty(1 + len(theta2)) - - for itheta in range(1 + len(theta2)): - dKs_inv = -self.Ks_inv.dot(dKs[:, :, itheta].dot(self.Ks_inv)) - - dkappa_X = self.Ks_inv.dot(dks_X[:, :, itheta]) + dKs_inv.dot( - self.ks_X) - dkappa_int_points = self.Ks_inv.dot( - dks_int_points[:, :, itheta]) + dKs_inv.dot( - self.ks_int_points) - - dKtilde_X = dKss[itheta] - numpy.sum( - dks_X[:, :, itheta] * self.kappa_X, axis=0) - numpy.sum( - self.ks_X * dkappa_X, axis=0) - dKtilde_int_points = dKss[itheta] - numpy.sum( - dks_int_points[:, :, itheta] * self.kappa_int_points, - axis=0) - numpy.sum(self.ks_int_points * dkappa_int_points, - axis=0) - - dg1_X = self.mu_g_s.dot(dkappa_X) - dg1_int_points = self.mu_g_s.dot(dkappa_int_points) - - dg2_X = (dKtilde_X + 2. * numpy.sum( - self.kappa_X * Sigma_s_mugmug.dot(dkappa_X), - axis=0)) * self.mu_omega_X - dg2_int_points = (dKtilde_int_points + 2. * numpy.sum( - self.kappa_int_points * Sigma_s_mugmug.dot(dkappa_int_points), - axis=0)) * self.mu_omega_int_points - - dL_dtheta[itheta] = .5 * (numpy.sum(dg1_X) - numpy.sum(dg2_X)) - dL_dtheta[itheta] += .5 * numpy.dot( - -dg1_int_points - dg2_int_points, - self.lmbda_q2) / self.num_integration_points * self.R - dL_dtheta[itheta] -= .5 * numpy.trace(self.Ks_inv.dot( - dKs[:, :, itheta])) - dL_dtheta[itheta] += .5 * numpy.trace( - self.Ks_inv.dot(dKs[:, :, itheta].dot( - self.Ks_inv.dot(Sigma_s_mugmug)))) - - return dL_dtheta - - def update_hyperparameters(self): - """ Updates the hyperparameters with Adam. - """ - dL_dtheta = self.calculate_hyperparam_derivative() - logtheta1, logtheta2 = numpy.log(self.cov_params[0]), \ - numpy.log(self.cov_params[1]) - dL_dlogtheta1 = dL_dtheta[0] * numpy.exp(logtheta1) - dL_dlogtheta2 = dL_dtheta[1:] * numpy.exp(logtheta2) - - self.m_hyper_adam[0] = self.beta1_adam * self.m_hyper_adam[0] + \ - (1. - self.beta1_adam) * dL_dlogtheta1 - self.v_hyper_adam[0] = self.beta2_adam * self.v_hyper_adam[0] + \ - (1. - self.beta2_adam) * dL_dlogtheta1 ** 2 - self.m_hyper_adam[1:] = self.beta1_adam * self.m_hyper_adam[1:] + \ - (1. - self.beta1_adam) * dL_dlogtheta2 - self.v_hyper_adam[1:] = self.beta2_adam * self.v_hyper_adam[1:] + \ - (1. - self.beta2_adam) * dL_dlogtheta2 ** 2 - m_hat = self.m_hyper_adam / (1. - self.beta1_adam) - v_hat = self.v_hyper_adam / (1. - self.beta2_adam) - logtheta1 += self.epsilon * m_hat[0] / (numpy.sqrt(v_hat[0]) + - self.epsilon_adam) - logtheta2 += self.epsilon * m_hat[1:] / (numpy.sqrt(v_hat[1:]) + - self.epsilon_adam) - self.cov_params[0] = numpy.exp(logtheta1) - self.cov_params[1] = numpy.exp(logtheta2) - self.update_kernels() - self.update_predictive_posterior() - - def update_predictive_posterior(self, only_int_points=False): - """ Updates the function g (mean & variance) at each point (observed - and points for monte carlo integral) - - :param only_int_points: bool - If True it only updates the integration points. (Default=False) - """ - - if not only_int_points: - mu_g_X, var_g_X = self.predictive_posterior_GP( - self.X, points='X') - self.mu_g_X = mu_g_X - self.mu_g2_X = var_g_X + mu_g_X ** 2 - mu_g_int_points, var_g_int_points = self.predictive_posterior_GP( - self.integration_points, points='int_points') - self.mu_g_int_points = mu_g_int_points - self.mu_g2_int_points = var_g_int_points + mu_g_int_points ** 2 - - def predictive_intensity_function(self, X_eval): - """ Computes the predictive intensity function at X_eval by Gaussian - quadrature. - - :param X_eval: numpy.ndarray [num_points_eval x D] - Points where the intensity function should be evaluated. - - :returns: - numpy.ndarray [num_points]: mean of predictive posterior intensity - numpy.ndarray [num_points]: variance of predictive posterior - intensity - """ - num_preds = X_eval.shape[0] - mu_pred, var_pred = self.predictive_posterior_GP(X_eval) - - mean_lmbda_pred, var_lmbda_pred = numpy.empty(num_preds), \ - numpy.empty(num_preds) - - mean_lmbda_q1 = self.lmbda_star_q1 - var_lmbda_q1 = self.alpha_q1 / (self.beta_q1 ** 2) - mean_lmbda_q1_squared = var_lmbda_q1 + mean_lmbda_q1 ** 2 - - for ipred in range(num_preds): - mu, std = mu_pred[ipred], numpy.sqrt(var_pred[ipred]) - func1 = lambda g_pred: 1. / (1. + numpy.exp(-g_pred)) * \ - numpy.exp(-.5 * (g_pred - mu) ** 2 / std ** 2) / \ - numpy.sqrt(2. * numpy.pi * std ** 2) - a, b = mu - 10. * std, mu + 10. * std - mean_lmbda_pred[ipred] = mean_lmbda_q1 * quadrature(func1, a, b, - maxiter=500)[0] - func2 = lambda g_pred: (1. / (1. + numpy.exp(-g_pred))) ** 2 * \ - numpy.exp( - -.5 * (g_pred - mu) ** 2 / std ** 2) / \ - numpy.sqrt(2. * numpy.pi * std ** 2) - a, b = mu - 10. * std, mu + 10. * std - mean_lmbda_pred_squared = mean_lmbda_q1_squared * \ - quadrature(func2, a, b, maxiter=500)[0] - var_lmbda_pred[ipred] = mean_lmbda_pred_squared - mean_lmbda_pred[ - ipred] ** 2 - - return mean_lmbda_pred, var_lmbda_pred - - def sample_posterior(self, X_test, num_samples=1): - """ Samples log predictive likelihood for test set from posterior. - - :param X_test: [num_X_test x D] - Observations in test set. - :param num_samples: int - How many samples of the intensity function should be drawn from - the posterior. (Default=1e4) - - :return: numpy.ndarray [num_samples] - Returns the array of sampled likelihoods. - """ - - num_events = X_test.shape[0] - num_samples = int(num_samples) - X = numpy.concatenate([X_test, self.integration_points]) - K = self.cov_func(X, X) - kx = self.cov_func(X, self.induced_points) - kappa = kx.dot(self.Ks_inv) - Sigma_post = K - kappa.dot(kx.T - self.Sigma_g_s.dot(kappa.T)) - mu_post = kappa.dot(self.mu_g_s) - L_post = numpy.linalg.cholesky(Sigma_post + self.noise * numpy.eye( - Sigma_post.shape[0])) - - num_points = X.shape[0] - num_hundreds = int(num_samples) - pred_log_likelihood = numpy.empty([num_samples]) - - samples = [] - # samples hundred instances at a time - for ihundreds in range(num_hundreds): - rand_nums = numpy.random.randn(num_points, 1) - g_sample = mu_post[:, None] + L_post.dot(rand_nums) - lmbda_max_sample = numpy.random.gamma(shape=self.alpha_q1, - scale=1. / self.beta_q1, - size=1) - lmbda_sample = lmbda_max_sample / (1. + numpy.exp(-g_sample)) - samples.append(lmbda_sample) - return samples - - def predictive_log_likelihood(self, X_test, num_samples=1e4): - """ Samples log predictive likelihood for test set from posterior. - - :param X_test: [num_X_test x D] - Observations in test set. - :param num_samples: int - How many samples of the intensity function should be drawn from - the posterior. (Default=1e4) - - :return: numpy.ndarray [num_samples] - Returns the array of sampled likelihoods. - """ - - num_events = X_test.shape[0] - num_samples = int(num_samples) - X = numpy.concatenate([X_test, self.integration_points]) - K = self.cov_func(X, X) - kx = self.cov_func(X, self.induced_points) - kappa = kx.dot(self.Ks_inv) - Sigma_post = K - kappa.dot(kx.T - self.Sigma_g_s.dot(kappa.T)) - mu_post = kappa.dot(self.mu_g_s) - L_post = numpy.linalg.cholesky(Sigma_post + self.noise * numpy.eye( - Sigma_post.shape[0])) - - num_points = X.shape[0] - num_hundreds = int(num_samples / 1e2) - pred_log_likelihood = numpy.empty([num_samples]) - - # samples hundred instances at a time - for ihundreds in range(num_hundreds): - rand_nums = numpy.random.randn(num_points, 100) - g_sample = mu_post[:, None] + L_post.dot(rand_nums) - lmbda_max_sample = numpy.random.gamma(shape=self.alpha_q1, - scale=1. / self.beta_q1, - size=100) - lmbda_sample = lmbda_max_sample / (1. + numpy.exp(-g_sample)) - - pred_log_likelihood[ihundreds * 100:(ihundreds + 1) * 100] = \ - numpy.sum(numpy.log(lmbda_sample[:num_events]), axis=0) - pred_log_likelihood[ihundreds * 100:(ihundreds + 1) * 100] -= \ - numpy.mean(lmbda_sample[num_events:], axis=0) * self.R - - return pred_log_likelihood - - def expanded_predictive_log_likelihood(self, X_test): - """ Fast approximation for log predictive test likelihood (Eq. 33 in - paper). - - :param X_test: [num_X_test x D] - Observations in test set. - - :return: float - Approximation of log predictive test likelihood. - """ - self.update_predictive_posterior(only_int_points=True) - N = X_test.shape[0] - ks_x_test = self.cov_func(self.induced_points, X_test) - mu_g_X_test = ks_x_test.T.dot(self.Ks_inv.dot(self.mu_g_s)) - u_mean = -self.lmbda_star_q1 * numpy.mean( - 1. / (1. + numpy.exp(-self.mu_g_int_points))) * self.R - \ - numpy.sum(numpy.log(1. + numpy.exp(-mu_g_X_test))) + \ - N * numpy.log(self.lmbda_star_q1) - - log_pred_likelihood = u_mean - du_dg = numpy.empty(N + self.num_integration_points) - du_dg[:N] = 1. / (1. + numpy.exp(mu_g_X_test)) - du_dg[N:] = - self.lmbda_star_q1 / (1. + numpy.exp( - -self.mu_g_int_points)) * (1. - 1. / (1. + numpy.exp( - -self.mu_g_int_points))) \ - / self.num_integration_points * self.R - du_dg2 = numpy.empty(N + self.num_integration_points) - du_dg2[:N] = - (1. - 1. / (1. + numpy.exp(mu_g_X_test))) / \ - (1. + numpy.exp(mu_g_X_test)) - du_dg2[N:] = - self.lmbda_star_q1 / (1. + numpy.exp( - -self.mu_g_int_points)) * (1. - 1. / (1. + numpy.exp( - -self.mu_g_int_points))) * (1. - 2. / (1. + numpy.exp( - -self.mu_g_int_points))) / self.num_integration_points * self.R - - du_dlambda = - self.R * numpy.mean( - 1. / (1. + numpy.exp(-self.mu_g_int_points))) + N / self.lmbda_star_q1 - du_dlmbda2 = - N / self.lmbda_star_q1 ** 2 - - C = numpy.empty([N + self.num_integration_points, - N + self.num_integration_points]) - inner_matrix = self.Ks_inv.dot( - numpy.identity(self.num_inducing_points) - - self.Sigma_g_s.dot(self.Ks_inv)) - - K_X = self.cov_func(X_test, X_test) + self.noise * numpy.identity( - X_test.shape[0]) - - C[:N, :N] = K_X - ks_x_test.T.dot(inner_matrix.dot( - ks_x_test)) - del K_X - K_int_points = self.cov_func(self.integration_points, - self.integration_points) + \ - self.noise * numpy.identity( - self.integration_points.shape[0]) - - C[N:, N:] = K_int_points - self.ks_int_points.T.dot(inner_matrix.dot( - self.ks_int_points)) - del K_int_points - - K_X_int_points = self.cov_func(self.integration_points, X_test) - C[N:, :N] = K_X_int_points - self.ks_int_points.T.dot(inner_matrix.dot( - ks_x_test)) - del K_X_int_points - - C[:N, N:] = C[N:, :N].T - - log_pred_likelihood_corr = .5 * numpy.trace(C.dot(numpy.diag( - du_dg2) + numpy.outer(du_dg, du_dg))) \ - + .5 * (du_dlmbda2 + du_dlambda ** 2) * self.alpha_q1 / self.beta_q1 ** 2 - log_pred_likelihood += log_pred_likelihood_corr - - return log_pred_likelihood +class VMF_SGCP: + + def __init__( + self, + S_borders, + X, + cov_params, + num_inducing_points, + lmbda_star=None, + conv_crit=1e-4, + num_integration_points=1000, + output=False, + update_hyperparams=True, + noise=1e-4, + epsilon=5e-2, + ): + """Class initialisation for variational mean field inference for + sigmoidal Gaussian Cox process. + + :param S_borders: numpy.ndarray [D x 2] + Limits of the region of interest. + :param X: numpy.ndarray [num_points x D] + Positions of the observations. + :param cov_params: numpy.ndarray [D + 1] + Hyperparameters of the covariance functions. First is amplitude, + and the others the length scale for each dimension. + :param num_inducing_points: int + Number of inducing points (Should be a power of dimensions) + :param lmbda_star: float + Maximal intensity. If None it is initialized as twice the mean + observation rate for a homogeneous process. (Default=None) + :param conv_crit: + Convergence criterion, when algorithm should stop. (Default=1e-4) + :param num_integration_points: int + Number of points that should be used for Monte Carlo integration. + (Default = 1000) + :param output: bool + Prints info after each optimisation step. (Default=False) + :param update_hyperparams: bool + Whether the hyperparameters are updated (by Adam) or not. ( + Default=False) + :param noise: float + Noise added to the diagonal of the covariance matrix (should be + small). (Default=1e-4) + param epsilon: float + Step size for Adam in the hyperparameter update. (Default=5e-2) + """ + + self.S_borders = S_borders + self.S = S_borders[:, 1] - S_borders[:, 0] + self.R = numpy.prod(self.S) + self.D = S_borders.shape[0] + self.noise = noise + self.cov_params = cov_params + self.num_integration_points = num_integration_points + self.num_inducing_points = num_inducing_points # must be power of D + self.X = X + + self.place_inducing_points() + self.mu_g_s = numpy.zeros(self.induced_points.shape[0]) + self.Sigma_g_s = numpy.identity(self.induced_points.shape[0]) + self.Sigma_g_s_inv = numpy.identity(self.induced_points.shape[0]) + self.Ks = self.cov_func(self.induced_points, self.induced_points) + L = numpy.linalg.cholesky(self.Ks + self.noise * numpy.eye(self.Ks.shape[0])) + L_inv = solve_triangular( + L, numpy.eye(L.shape[0]), lower=True, check_finite=False + ) + self.Ks_inv = L_inv.T.dot(L_inv) + self.logdet_Ks = 2.0 * numpy.sum(numpy.log(L.diagonal())) + + self.place_integration_points() + self.ks_X = self.cov_func(self.induced_points, self.X) + self.LB_list = [] + self.times = [] + + self.kappa_X = self.Ks_inv.dot(self.ks_X) + self.kappa_int_points = self.Ks_inv.dot(self.ks_int_points) + self.mu_g_X, var_g_X = self.predictive_posterior_GP(self.X, "X") + self.mu_g2_X = var_g_X + self.mu_g_X**2 + self.mu_g_int_points, var_g_int_points = self.predictive_posterior_GP( + self.integration_points, "int_points" + ) + self.mu_g2_int_points = var_g_int_points + self.mu_g_int_points**2 + self.epsilon = epsilon + self.alpha0 = 4.0 + self.beta0 = 2.0 / (float(self.X.shape[0] / self.R)) + if lmbda_star is None: + self.lmbda_star_q1 = self.alpha0 / self.beta0 + self.log_lmbda_star_q1 = digamma(self.alpha0) - numpy.log(self.beta0) + else: + self.lmbda_star_q1 = lmbda_star + self.log_lmbda_star_q1 = numpy.log(lmbda_star) + self.alpha_q1 = self.alpha0 + self.beta_q1 = self.beta0 + self.convergence = numpy.inf + self.conv_crit = conv_crit + self.num_iterations = 0 + self.output = output + self.update_hyperparams = update_hyperparams + + # ADAM parameters + self.beta1_adam = 0.9 + self.beta2_adam = 0.99 + self.epsilon_adam = 1e-5 + self.m_hyper_adam = numpy.zeros(self.D + 1) + self.v_hyper_adam = numpy.zeros(self.D + 1) + self.m_bm_adam = numpy.zeros(self.D) + self.v_bm_adam = numpy.zeros(self.D) + + def place_inducing_points(self): + """Places the induced points for sparse GP.""" + + num_per_dim = int(numpy.ceil(self.num_inducing_points ** (1.0 / self.D))) + induced_grid = numpy.empty([num_per_dim, self.D]) + for di in range(self.D): + dist_between_points = self.S[di] / num_per_dim + induced_grid[:, di] = numpy.arange( + 0.5 * dist_between_points, self.S[di], dist_between_points + ) + + self.induced_points = numpy.meshgrid(*induced_grid.T.tolist()) + self.induced_points = numpy.array(self.induced_points).reshape([self.D, -1]).T + + def run(self): + """Fitting function for the variational mean-field algorithm.""" + + # Initialisation + self.times.append(time.perf_counter()) + self.calculate_PG_expectations() + self.calculate_posterior_intensity() + converged = False + while not converged: + self.num_iterations += 1 + # Update second factor q2 + self.calculate_postrior_GP() + self.update_predictive_posterior() + self.update_max_intensity() + # Update first factor q1 + self.calculate_PG_expectations() + self.calculate_posterior_intensity() + # Update hyperparameters + if self.update_hyperparams: + self.update_hyperparameters() + # Calculate lower bound + self.LB_list.append(self.calculate_lower_bound()) + # Check for convergence + if self.num_iterations > 1: + self.convergence = numpy.absolute( + self.LB_list[-1] - self.LB_list[-2] + ) / numpy.amax( + [numpy.abs(self.LB_list[-1]), numpy.abs(self.LB_list[-2]), 1] + ) + converged = self.convergence < self.conv_crit + self.times.append(time.perf_counter()) + if self.output: + self.print_info() + + def print_info(self): + """Functions to print info, while iteratively updating posterior.""" + print( + ( + " +-----------------+ " + + "\n | Iteration %4d |" + + "\n | Conv. = %.4f |" + + "\n +-----------------+" + ) + % (self.num_iterations, self.convergence_inner) + ) + + def place_integration_points(self): + """Places the integration points for Monte Carlo integration and + updates all related kernels. + """ + + self.integration_points = numpy.random.rand(self.num_integration_points, self.D) + self.integration_points *= self.S[numpy.newaxis] + self.ks_int_points = self.cov_func(self.induced_points, self.integration_points) + self.kappa_int_points = self.Ks_inv.dot(self.ks_int_points) + + def calculate_posterior_intensity(self): + """The rate of the posterior process is updated.""" + + self.lmbda_q2 = ( + 0.5 + * numpy.exp(-0.5 * self.mu_g_int_points + self.log_lmbda_star_q1) + / numpy.cosh(0.5 * self.c_int_points) + ) + + def calculate_PG_expectations(self): + """The Polya-Gamma posterior is updated.""" + + self.c_X = numpy.sqrt(self.mu_g2_X) + self.mu_omega_X = 0.5 / self.c_X * numpy.tanh(0.5 * self.c_X) + self.c_int_points = numpy.sqrt(self.mu_g2_int_points) + self.mu_omega_int_points = ( + 0.5 / self.c_int_points * numpy.tanh(0.5 * self.c_int_points) + ) + + def calculate_predictive_posterior_intensity(self, X_prime): + """Calculates the posterior intensity at X_prime for the latent + Poisson process. (Not the intensity of the observed Poisson process!!!) + + :param X_prime: numpy.ndarray [num_points x D] + Position of points, that should be evaluated. + + :return: numpy.ndarray [num_points] + Posterior intensity. + """ + mu_g, var_g = self.predictive_posterior_GP(X_prime) + mu_g = mu_g + mu_g2 = var_g + mu_g**2 + c = numpy.sqrt(mu_g2) + pred_lmbda_q2 = ( + 0.5 * numpy.exp(-0.5 * mu_g + self.log_lmbda_star_q1) / numpy.cosh(0.5 * c) + ) + return pred_lmbda_q2 + + def calculate_postrior_GP(self): + """The new GP at the inducing points is calculated.""" + + A_int_points = self.lmbda_q2 * self.mu_omega_int_points + A_X = self.mu_omega_X + kAk = ( + self.kappa_X.dot(A_X[:, numpy.newaxis] * self.kappa_X.T) + + self.kappa_int_points.dot( + A_int_points[:, numpy.newaxis] * self.kappa_int_points.T + ) + / self.num_integration_points + * self.R + ) + self.Sigma_g_s_inv = kAk + self.Ks_inv + L_inv = numpy.linalg.cholesky( + self.Sigma_g_s_inv + self.noise * numpy.eye(self.Sigma_g_s_inv.shape[0]) + ) + L = solve_triangular( + L_inv, numpy.eye(L_inv.shape[0]), lower=True, check_finite=False + ) + self.Sigma_g_s = L.T.dot(L) + self.logdet_Sigma_g_s = 2 * numpy.sum(numpy.log(L.diagonal())) + b_int_points = -0.5 * self.lmbda_q2 + b_X = 0.5 * numpy.ones(self.X.shape[0]) + kb = ( + self.ks_X.dot(b_X) + + self.ks_int_points.dot(b_int_points) + / self.num_integration_points + * self.R + ) + self.mu_g_s = self.Sigma_g_s.dot(kb.dot(self.Ks_inv)) + + def predictive_posterior_GP(self, x_prime, points=None): + """Computes the predictive posterior for given points + + :param x_prime: numpy.ndarray [num_points x D] + Points, which should be predicted for. + :param points: str + If 'int_points' or 'X' posterior for integration points or + observation points is calculated, respectively. (Default=None) + :returns: + numpy.ndarray [num_points]: mean of predictive posterior + numpy.ndarray [num_points]: variance of predictive posterior + """ + if points is None: + ks_x_prime = self.cov_func(self.induced_points, x_prime) + kappa = self.Ks_inv.dot(ks_x_prime) + elif points is "int_points": + ks_x_prime = self.ks_int_points + kappa = self.kappa_int_points + elif points is "X": + ks_x_prime = self.ks_X + kappa = self.kappa_X + + mu_g_x_prime = kappa.T.dot(self.mu_g_s) + K_xx = self.cov_func(x_prime, x_prime, only_diagonal=True) + var_g_x_prime = K_xx - numpy.sum( + kappa * (ks_x_prime - kappa.T.dot(self.Sigma_g_s).T), axis=0 + ) + return mu_g_x_prime, var_g_x_prime + + def cov_func(self, x, x_prime, only_diagonal=False): + """Computes the covariance functions between x and x_prime. + + :param x: numpy.ndarray [num_points x D] + Contains coordinates for points of x + :param x_prime: numpy.ndarray [num_points_prime x D] + Contains coordinates for points of x_prime + :param only_diagonal: bool + If true only diagonal is computed (Works only if x and x_prime + are the same, Default=False) + + :return: numpy.ndarray [num_points x num_points_prime] + ([num_points_prime] if only diagonal) + Kernel matrix. + """ + + theta_1, theta_2 = self.cov_params[0], self.cov_params[1] + if only_diagonal: + return theta_1 * numpy.ones(x.shape[0]) + + else: + x_theta2 = x / theta_2 + xprime_theta2 = x_prime / theta_2 + h = ( + numpy.sum(x_theta2**2, axis=1)[:, None] + - 2.0 * numpy.dot(x_theta2, xprime_theta2.T) + + numpy.sum(xprime_theta2**2, axis=1)[None] + ) + return theta_1 * numpy.exp(-0.5 * h) + + def calculate_lower_bound(self): + """Calculates the variational lower bound for current posterior. + + :return: float + Variational lower bound. + """ + + Sigma_s_mugmug = self.Sigma_g_s + numpy.outer(self.mu_g_s, self.mu_g_s) + f_int_points = 0.5 * ( + -self.mu_g_int_points - self.mu_g2_int_points * self.mu_omega_int_points + ) - numpy.log(2) + integrand = ( + f_int_points + - numpy.log(self.lmbda_q2 * numpy.cosh(0.5 * self.c_int_points)) + + self.log_lmbda_star_q1 + + 0.5 * self.c_int_points**2 * self.mu_omega_int_points + + 1.0 + ) + f_X = 0.5 * (self.mu_g_X - self.mu_g2_X * self.mu_omega_X) - numpy.log(2) + summand = ( + f_X + + self.log_lmbda_star_q1 + - numpy.log(numpy.cosh(0.5 * self.c_X)) + + 0.5 * self.c_X**2 * self.mu_omega_X + ) + + L = integrand.dot(self.lmbda_q2) / self.num_integration_points * self.R + L -= self.lmbda_star_q1 * self.R + L += numpy.sum(summand) + L -= 0.5 * numpy.trace(self.Ks_inv.dot(Sigma_s_mugmug)) + L -= 0.5 * self.logdet_Ks + L += 0.5 * self.logdet_Sigma_g_s + 0.5 * self.num_inducing_points + L += ( + self.alpha0 * numpy.log(self.beta0) + - gammaln(self.alpha0) + + (self.alpha0 - 1) * self.log_lmbda_star_q1 + - self.beta0 * self.lmbda_star_q1 + ) + L += ( + self.alpha_q1 + - numpy.log(self.beta_q1) + + gammaln(self.alpha_q1) + + (1.0 - self.alpha_q1) * digamma(self.alpha_q1) + ) + + return L + + def update_max_intensity(self): + """Updates the posterior for the maximal intensity.""" + self.alpha_q1 = ( + self.X.shape[0] + + numpy.sum(self.lmbda_q2) / self.num_integration_points * self.R + + self.alpha0 + ) + self.beta_q1 = self.beta0 + self.R + self.lmbda_star_q1 = self.alpha_q1 / self.beta_q1 + self.log_lmbda_star_q1 = digamma(self.alpha_q1) - numpy.log(self.beta_q1) + + def update_kernels(self): + """Updates all kernels (for inducing, observed and integration points).""" + self.ks_int_points = self.cov_func(self.induced_points, self.integration_points) + self.ks_X = self.cov_func(self.induced_points, self.X) + self.Ks = self.cov_func(self.induced_points, self.induced_points) + L = numpy.linalg.cholesky(self.Ks + self.noise * numpy.eye(self.Ks.shape[0])) + L_inv = solve_triangular( + L, numpy.eye(L.shape[0]), lower=True, check_finite=False + ) + self.Ks_inv = L_inv.T.dot(L_inv) + self.logdet_Ks = 2.0 * numpy.sum(numpy.log(L.diagonal())) + self.kappa_X = self.Ks_inv.dot(self.ks_X) + self.kappa_int_points = self.Ks_inv.dot(self.ks_int_points) + + def calculate_hyperparam_derivative(self): + """Calculates the derivative of the hyperparameters. + + :return: numpy.ndarray [D + 1] + Derivatives of hyperparameters. + """ + + theta1, theta2 = self.cov_params[0], numpy.copy(self.cov_params[1]) + Sigma_s_mugmug = self.Sigma_g_s + numpy.outer(self.mu_g_s, self.mu_g_s) + dks_X = numpy.empty( + [self.ks_X.shape[0], self.ks_X.shape[1], 1 + theta2.shape[0]] + ) + dks_int_points = numpy.empty( + [ + self.ks_int_points.shape[0], + self.ks_int_points.shape[1], + 1 + theta2.shape[0], + ] + ) + dKs = numpy.empty([self.Ks.shape[0], self.Ks.shape[1], 1 + theta2.shape[0]]) + dKss = numpy.zeros([1 + theta2.shape[0]]) + dKss[0] = 1.0 + + # kernel derivatives wrt theta1 + dks_X[:, :, 0] = self.ks_X / theta1 + dks_int_points[:, :, 0] = self.ks_int_points / theta1 + dKs[:, :, 0] = self.Ks / theta1 + # kernel derivatives wrt theta2 + dx = numpy.subtract(self.induced_points[:, None], self.X[None]) + dks_X[:, :, 1:] = self.ks_X[:, :, None] * (dx**2) / (theta2[None, None] ** 3) + dx = numpy.subtract(self.induced_points[:, None], self.integration_points[None]) + dks_int_points[:, :, 1:] = ( + self.ks_int_points[:, :, None] * (dx**2) / (theta2[None, None] ** 3) + ) + dx = numpy.subtract(self.induced_points[:, None], self.induced_points[None]) + dKs[:, :, 1:] = self.Ks[:, :, None] * (dx**2) / (theta2[None, None] ** 3) + dL_dtheta = numpy.empty(1 + len(theta2)) + + for itheta in range(1 + len(theta2)): + dKs_inv = -self.Ks_inv.dot(dKs[:, :, itheta].dot(self.Ks_inv)) + + dkappa_X = self.Ks_inv.dot(dks_X[:, :, itheta]) + dKs_inv.dot(self.ks_X) + dkappa_int_points = self.Ks_inv.dot( + dks_int_points[:, :, itheta] + ) + dKs_inv.dot(self.ks_int_points) + + dKtilde_X = ( + dKss[itheta] + - numpy.sum(dks_X[:, :, itheta] * self.kappa_X, axis=0) + - numpy.sum(self.ks_X * dkappa_X, axis=0) + ) + dKtilde_int_points = ( + dKss[itheta] + - numpy.sum( + dks_int_points[:, :, itheta] * self.kappa_int_points, axis=0 + ) + - numpy.sum(self.ks_int_points * dkappa_int_points, axis=0) + ) + + dg1_X = self.mu_g_s.dot(dkappa_X) + dg1_int_points = self.mu_g_s.dot(dkappa_int_points) + + dg2_X = ( + dKtilde_X + + 2.0 * numpy.sum(self.kappa_X * Sigma_s_mugmug.dot(dkappa_X), axis=0) + ) * self.mu_omega_X + dg2_int_points = ( + dKtilde_int_points + + 2.0 + * numpy.sum( + self.kappa_int_points * Sigma_s_mugmug.dot(dkappa_int_points), + axis=0, + ) + ) * self.mu_omega_int_points + + dL_dtheta[itheta] = 0.5 * (numpy.sum(dg1_X) - numpy.sum(dg2_X)) + dL_dtheta[itheta] += ( + 0.5 + * numpy.dot(-dg1_int_points - dg2_int_points, self.lmbda_q2) + / self.num_integration_points + * self.R + ) + dL_dtheta[itheta] -= 0.5 * numpy.trace(self.Ks_inv.dot(dKs[:, :, itheta])) + dL_dtheta[itheta] += 0.5 * numpy.trace( + self.Ks_inv.dot(dKs[:, :, itheta].dot(self.Ks_inv.dot(Sigma_s_mugmug))) + ) + + return dL_dtheta + + def update_hyperparameters(self): + """Updates the hyperparameters with Adam.""" + dL_dtheta = self.calculate_hyperparam_derivative() + logtheta1, logtheta2 = numpy.log(self.cov_params[0]), numpy.log( + self.cov_params[1] + ) + dL_dlogtheta1 = dL_dtheta[0] * numpy.exp(logtheta1) + dL_dlogtheta2 = dL_dtheta[1:] * numpy.exp(logtheta2) + + self.m_hyper_adam[0] = ( + self.beta1_adam * self.m_hyper_adam[0] + + (1.0 - self.beta1_adam) * dL_dlogtheta1 + ) + self.v_hyper_adam[0] = ( + self.beta2_adam * self.v_hyper_adam[0] + + (1.0 - self.beta2_adam) * dL_dlogtheta1**2 + ) + self.m_hyper_adam[1:] = ( + self.beta1_adam * self.m_hyper_adam[1:] + + (1.0 - self.beta1_adam) * dL_dlogtheta2 + ) + self.v_hyper_adam[1:] = ( + self.beta2_adam * self.v_hyper_adam[1:] + + (1.0 - self.beta2_adam) * dL_dlogtheta2**2 + ) + m_hat = self.m_hyper_adam / (1.0 - self.beta1_adam) + v_hat = self.v_hyper_adam / (1.0 - self.beta2_adam) + logtheta1 += ( + self.epsilon * m_hat[0] / (numpy.sqrt(v_hat[0]) + self.epsilon_adam) + ) + logtheta2 += ( + self.epsilon * m_hat[1:] / (numpy.sqrt(v_hat[1:]) + self.epsilon_adam) + ) + self.cov_params[0] = numpy.exp(logtheta1) + self.cov_params[1] = numpy.exp(logtheta2) + self.update_kernels() + self.update_predictive_posterior() + + def update_predictive_posterior(self, only_int_points=False): + """Updates the function g (mean & variance) at each point (observed + and points for monte carlo integral) + + :param only_int_points: bool + If True it only updates the integration points. (Default=False) + """ + + if not only_int_points: + mu_g_X, var_g_X = self.predictive_posterior_GP(self.X, points="X") + self.mu_g_X = mu_g_X + self.mu_g2_X = var_g_X + mu_g_X**2 + mu_g_int_points, var_g_int_points = self.predictive_posterior_GP( + self.integration_points, points="int_points" + ) + self.mu_g_int_points = mu_g_int_points + self.mu_g2_int_points = var_g_int_points + mu_g_int_points**2 + + def predictive_intensity_function(self, X_eval): + """Computes the predictive intensity function at X_eval by Gaussian + quadrature. + + :param X_eval: numpy.ndarray [num_points_eval x D] + Points where the intensity function should be evaluated. + + :returns: + numpy.ndarray [num_points]: mean of predictive posterior intensity + numpy.ndarray [num_points]: variance of predictive posterior + intensity + """ + num_preds = X_eval.shape[0] + mu_pred, var_pred = self.predictive_posterior_GP(X_eval) + + mean_lmbda_pred, var_lmbda_pred = numpy.empty(num_preds), numpy.empty(num_preds) + + mean_lmbda_q1 = self.lmbda_star_q1 + var_lmbda_q1 = self.alpha_q1 / (self.beta_q1**2) + mean_lmbda_q1_squared = var_lmbda_q1 + mean_lmbda_q1**2 + + for ipred in range(num_preds): + mu, std = mu_pred[ipred], numpy.sqrt(var_pred[ipred]) + func1 = ( + lambda g_pred: 1.0 + / (1.0 + numpy.exp(-g_pred)) + * numpy.exp(-0.5 * (g_pred - mu) ** 2 / std**2) + / numpy.sqrt(2.0 * numpy.pi * std**2) + ) + a, b = mu - 10.0 * std, mu + 10.0 * std + mean_lmbda_pred[ipred] = ( + mean_lmbda_q1 * quadrature(func1, a, b, maxiter=500)[0] + ) + func2 = ( + lambda g_pred: (1.0 / (1.0 + numpy.exp(-g_pred))) ** 2 + * numpy.exp(-0.5 * (g_pred - mu) ** 2 / std**2) + / numpy.sqrt(2.0 * numpy.pi * std**2) + ) + a, b = mu - 10.0 * std, mu + 10.0 * std + mean_lmbda_pred_squared = ( + mean_lmbda_q1_squared * quadrature(func2, a, b, maxiter=500)[0] + ) + var_lmbda_pred[ipred] = ( + mean_lmbda_pred_squared - mean_lmbda_pred[ipred] ** 2 + ) + + return mean_lmbda_pred, var_lmbda_pred + + def sample_posterior(self, X_test, num_samples=1): + """Samples log predictive likelihood for test set from posterior. + + :param X_test: [num_X_test x D] + Observations in test set. + :param num_samples: int + How many samples of the intensity function should be drawn from + the posterior. (Default=1e4) + + :return: numpy.ndarray [num_samples] + Returns the array of sampled likelihoods. + """ + + num_events = X_test.shape[0] + num_samples = int(num_samples) + X = numpy.concatenate([X_test, self.integration_points]) + K = self.cov_func(X, X) + kx = self.cov_func(X, self.induced_points) + kappa = kx.dot(self.Ks_inv) + Sigma_post = K - kappa.dot(kx.T - self.Sigma_g_s.dot(kappa.T)) + mu_post = kappa.dot(self.mu_g_s) + L_post = numpy.linalg.cholesky( + Sigma_post + self.noise * numpy.eye(Sigma_post.shape[0]) + ) + + num_points = X.shape[0] + num_hundreds = int(num_samples) + pred_log_likelihood = numpy.empty([num_samples]) + + samples = [] + # samples hundred instances at a time + for ihundreds in range(num_hundreds): + rand_nums = numpy.random.randn(num_points, 1) + g_sample = mu_post[:, None] + L_post.dot(rand_nums) + lmbda_max_sample = numpy.random.gamma( + shape=self.alpha_q1, scale=1.0 / self.beta_q1, size=1 + ) + lmbda_sample = lmbda_max_sample / (1.0 + numpy.exp(-g_sample)) + samples.append(lmbda_sample) + return samples + + def predictive_log_likelihood(self, X_test, num_samples=1e4): + """Samples log predictive likelihood for test set from posterior. + + :param X_test: [num_X_test x D] + Observations in test set. + :param num_samples: int + How many samples of the intensity function should be drawn from + the posterior. (Default=1e4) + + :return: numpy.ndarray [num_samples] + Returns the array of sampled likelihoods. + """ + + num_events = X_test.shape[0] + num_samples = int(num_samples) + X = numpy.concatenate([X_test, self.integration_points]) + K = self.cov_func(X, X) + kx = self.cov_func(X, self.induced_points) + kappa = kx.dot(self.Ks_inv) + Sigma_post = K - kappa.dot(kx.T - self.Sigma_g_s.dot(kappa.T)) + mu_post = kappa.dot(self.mu_g_s) + L_post = numpy.linalg.cholesky( + Sigma_post + self.noise * numpy.eye(Sigma_post.shape[0]) + ) + + num_points = X.shape[0] + num_hundreds = int(num_samples / 1e2) + pred_log_likelihood = numpy.empty([num_samples]) + + # samples hundred instances at a time + for ihundreds in range(num_hundreds): + rand_nums = numpy.random.randn(num_points, 100) + g_sample = mu_post[:, None] + L_post.dot(rand_nums) + lmbda_max_sample = numpy.random.gamma( + shape=self.alpha_q1, scale=1.0 / self.beta_q1, size=100 + ) + lmbda_sample = lmbda_max_sample / (1.0 + numpy.exp(-g_sample)) + + pred_log_likelihood[ihundreds * 100 : (ihundreds + 1) * 100] = numpy.sum( + numpy.log(lmbda_sample[:num_events]), axis=0 + ) + pred_log_likelihood[ihundreds * 100 : (ihundreds + 1) * 100] -= ( + numpy.mean(lmbda_sample[num_events:], axis=0) * self.R + ) + + return pred_log_likelihood + + def expanded_predictive_log_likelihood(self, X_test): + """Fast approximation for log predictive test likelihood (Eq. 33 in + paper). + + :param X_test: [num_X_test x D] + Observations in test set. + + :return: float + Approximation of log predictive test likelihood. + """ + self.update_predictive_posterior(only_int_points=True) + N = X_test.shape[0] + ks_x_test = self.cov_func(self.induced_points, X_test) + mu_g_X_test = ks_x_test.T.dot(self.Ks_inv.dot(self.mu_g_s)) + u_mean = ( + -self.lmbda_star_q1 + * numpy.mean(1.0 / (1.0 + numpy.exp(-self.mu_g_int_points))) + * self.R + - numpy.sum(numpy.log(1.0 + numpy.exp(-mu_g_X_test))) + + N * numpy.log(self.lmbda_star_q1) + ) + + log_pred_likelihood = u_mean + du_dg = numpy.empty(N + self.num_integration_points) + du_dg[:N] = 1.0 / (1.0 + numpy.exp(mu_g_X_test)) + du_dg[N:] = ( + -self.lmbda_star_q1 + / (1.0 + numpy.exp(-self.mu_g_int_points)) + * (1.0 - 1.0 / (1.0 + numpy.exp(-self.mu_g_int_points))) + / self.num_integration_points + * self.R + ) + du_dg2 = numpy.empty(N + self.num_integration_points) + du_dg2[:N] = -(1.0 - 1.0 / (1.0 + numpy.exp(mu_g_X_test))) / ( + 1.0 + numpy.exp(mu_g_X_test) + ) + du_dg2[N:] = ( + -self.lmbda_star_q1 + / (1.0 + numpy.exp(-self.mu_g_int_points)) + * (1.0 - 1.0 / (1.0 + numpy.exp(-self.mu_g_int_points))) + * (1.0 - 2.0 / (1.0 + numpy.exp(-self.mu_g_int_points))) + / self.num_integration_points + * self.R + ) + + du_dlambda = ( + -self.R * numpy.mean(1.0 / (1.0 + numpy.exp(-self.mu_g_int_points))) + + N / self.lmbda_star_q1 + ) + du_dlmbda2 = -N / self.lmbda_star_q1**2 + + C = numpy.empty( + [N + self.num_integration_points, N + self.num_integration_points] + ) + inner_matrix = self.Ks_inv.dot( + numpy.identity(self.num_inducing_points) - self.Sigma_g_s.dot(self.Ks_inv) + ) + + K_X = self.cov_func(X_test, X_test) + self.noise * numpy.identity( + X_test.shape[0] + ) + + C[:N, :N] = K_X - ks_x_test.T.dot(inner_matrix.dot(ks_x_test)) + del K_X + K_int_points = self.cov_func( + self.integration_points, self.integration_points + ) + self.noise * numpy.identity(self.integration_points.shape[0]) + + C[N:, N:] = K_int_points - self.ks_int_points.T.dot( + inner_matrix.dot(self.ks_int_points) + ) + del K_int_points + + K_X_int_points = self.cov_func(self.integration_points, X_test) + C[N:, :N] = K_X_int_points - self.ks_int_points.T.dot( + inner_matrix.dot(ks_x_test) + ) + del K_X_int_points + + C[:N, N:] = C[N:, :N].T + + log_pred_likelihood_corr = ( + 0.5 * numpy.trace(C.dot(numpy.diag(du_dg2) + numpy.outer(du_dg, du_dg))) + + 0.5 * (du_dlmbda2 + du_dlambda**2) * self.alpha_q1 / self.beta_q1**2 + ) + log_pred_likelihood += log_pred_likelihood_corr + + return log_pred_likelihood diff --git a/stpy/borel_set.py b/stpy/borel_set.py index 2760ffc..36c4fe5 100644 --- a/stpy/borel_set.py +++ b/stpy/borel_set.py @@ -6,299 +6,312 @@ from stpy.helpers.helper import cartesian -class BorelSet(): - - def __init__(self, d, bounds): - self.d = d - self.bounds = bounds - self.calculate_volume() - self.type = "box" - - def description(self): - return self.bounds - - def calculate_volume(self): - self.vol = 1. - for i in range(self.d): - self.vol = self.vol * (self.bounds[i, 1] - self.bounds[i, 0]) - - def volume(self): - return self.vol - - def center_point(self): - return (self.bounds[:, 1] + self.bounds[:, 0]) / 2 - - def perimeter(self): - cir = 0. - for i in range(self.d): - cir += 2 * (self.bounds[i, 1] - self.bounds[i, 0]) - return cir - - def uniform_sample(self, n): - sample = torch.zeros(n, self.d).double() - for i in range(self.d): - sample_i = torch.from_numpy(np.random.uniform(self.bounds[i, 0], self.bounds[i, 1], n)) - sample[:, i] = sample_i - return sample - - def return_legendre_discretization(self, n): - nodes, weights = np.polynomial.legendre.leggauss(n) - nodes_arr = [] - weights_arr = [] - for i in range(self.d): - a, b = float(self.bounds[i, 0]), float(self.bounds[i, 1]) - nodes = nodes * (b - a) / 2. + (a + b) / 2. - nodes_arr.append(nodes) - weights_arr.append(weights * 0.5 * (b - a)) - - nodes = cartesian(nodes_arr) - weights = cartesian(weights_arr) - return torch.prod(torch.from_numpy(weights), dim=1), torch.from_numpy(nodes) - - def return_discretization(self, n, offsets=None): - dis = [] - for i in range(self.d): - if offsets is None: - x = np.linspace(self.bounds[i, 0], self.bounds[i, 1], n) - else: - x = np.linspace(self.bounds[i, 0] - offsets[i], self.bounds[i, 1] + offsets[i], n) - dis.append(x) - r = cartesian(dis) - r = torch.from_numpy(r) - return r - - def inside(self, set): - """ - Tests if set is inside this set - :param set: - :return: - """ - for i in range(self.d): - if self.bounds[i, 0] > set.bounds[i, 0] or self.bounds[i, 1] < set.bounds[i, 1]: - return False - return True - - def is_inside(self, x): - """ - :param x: (n,d) to check if a<=x set.bounds[i, 0] + or self.bounds[i, 1] < set.bounds[i, 1] + ): + return False + return True + + def is_inside(self, x): + """ + :param x: (n,d) to check if a<=x self.center[i] - self.radius or set.bounds[i, 1] < self.center[i] - self.radius: - return False - ## set is round - else: - if (self.center - set.center) ** 2 > self.radius ** 2: - return False - return True - - def is_inside(self, x): - """ - :param x: (n,d) to check if a<=x= (x - torch.tile(self.center, (n, 1))) ** 2 - return mask + def __init__(self, d, center, radius): + self.d = d + self.center = center + self.radius = radius + self.calculate_volume() + self.type = "round" + + def calculate_volume(self): + self.vol = ( + (self.radius**self.d) + * np.pi ** (self.d // 2) + / (scipy.special.gamma(self.d // 2 + 1)) + ) + + def description(self): + return self.center, self.radius + + def return_discretization(self, n): + if self.d == 1: + dis = [] + x = np.linspace(self.center - self.radius, self.center + self.radius, n) + dis.append(x) + r = cartesian(dis) + r = torch.from_numpy(r) + return r + + elif self.d == 2: + + p, w = np.polynomial.legendre.leggauss(n) + mu = np.arange(1, n + 1) + points = np.array( + [ + np.tile(self.radius * np.cos(mu * np.pi / (n + 1)), n), + np.outer(p, self.radius * np.sin(mu * np.pi / (n + 1))).flatten(), + ] + ).T + points[:, 0] += float(self.center[0]) + points[:, 1] += float(self.center[1]) + + # k = n - 2 + # theta = 2 * np.pi * np.arange(1, k + 2) / (k + 1) + # p, w = np.polynomial.legendre.leggauss(n + 1) + # # scale points to [r0, r1] (where r0 = 0, r1 = 1 for now) + # p = np.sqrt(0.5 * (p + 1.0)) + # p_theta = np.dstack(np.meshgrid(p, theta)).reshape(-1, 2).T + # points = np.array( + # [p_theta[0] * self.radius * np.cos(p_theta[1]), p_theta[0] * self.radius * np.sin(p_theta[1])] + # ).T + # points[:,0] += float(self.center[0]) + # points[:,1] += float(self.center[1]) + # + # points = np.concatenate((points,self.center.view(-1,self.d).numpy())) + + return torch.from_numpy(points) + + def return_legendre_discretization(self, n): + if self.d == 2: + p, w = np.polynomial.legendre.leggauss(n) + mu = np.arange(1, n + 1) + points = np.array( + [ + np.tile(self.radius * np.cos(mu * np.pi / (n + 1)), n), + np.outer(p, self.radius * np.sin(mu * np.pi / (n + 1))).flatten(), + ] + ).T + points[:, 0] += float(self.center[0]) + points[:, 1] += float(self.center[1]) + weights = np.outer(w, np.sin(mu * np.pi / (n + 1)) ** 2).flatten() / (n + 1) + return torch.from_numpy(weights), torch.from_numpy(points) + else: + raise AssertionError("Wrong type of set considered.") + + def inside(self, set): + """ + Tests if set is inside this set + :param set: + :return: + """ + + ## the tested set is box + if set.type == "box": + for i in range(self.d): + if ( + set.bounds[i, 0] > self.center[i] - self.radius + or set.bounds[i, 1] < self.center[i] - self.radius + ): + return False + ## set is round + else: + if (self.center - set.center) ** 2 > self.radius**2: + return False + return True + + def is_inside(self, x): + """ + :param x: (n,d) to check if a<=x= (x - torch.tile(self.center, (n, 1))) ** 2 + return mask class Node(BorelSet): - def __init__(self, d, bounds, parent): - super().__init__(d, bounds) - self.left = None - self.right = None - self.children = None - self.parent = parent - - if self.parent is None: - self.level = 1 - else: - self.level = parent.level + 1 - - -class HierarchicalBorelSets(): - - def __init__(self, d, interval, levels): - if d == 1: - self.top_node = Node(d, torch.Tensor([interval]), None) - elif d == 2: - self.top_node = Node(d, torch.Tensor(interval), None) - - self.Sets = [self.top_node] - self.levels = levels - if d == 1: - self.construct_1d(interval, levels, self.Sets, self.top_node) - else: - self.construct_2d(self.top_node.bounds, levels, self.Sets, self.top_node) - self.d = d - - def get_parent_set(self): - return self.top_node - - def get_sets_level(self, l): - out = [] - for s in self.Sets: - if s.level == l: - out.append(s) - return out - - def get_all_sets(self): - return self.Sets - - def get_ball_coverings(self, n, radius='auto'): - D = self.get_parent_set() - centers = D.return_discretization(n) - n = centers.size()[0] - sets = [] - for i in range(n): - if radius == 'auto': - sets.append(BallSet(D.d, centers[i, :], 2. / n)) - else: - sets.append(BallSet(D.d, centers[i, :], radius)) - return sets - - def construct_1d(self, interval, levels, S, parent): - - if levels > 1: - a, b = interval - c = (a + b) / 2. - - S_1 = Node(1, torch.Tensor([[a, c]]), parent) - S_2 = Node(1, torch.Tensor([[c, b]]), parent) - - parent.left = S_1 - parent.right = S_2 - - S.append(S_1) - self.construct_1d((a, c), levels - 1, S, S_1) - S.append(S_2) - self.construct_1d((c, b), levels - 1, S, S_2) - - else: - return None - - def construct_2d(self, interval, levels, S, parent): - if levels > 1: - xa = interval[0, 0] - xb = interval[0, 1] - ya = interval[1, 0] - yb = interval[1, 1] - - midx = xa + (xb - xa) / 2. - midy = ya + (yb - ya) / 2. - - S1 = Node(2, torch.Tensor([[xa, midx], [ya, midy]]), parent) - S2 = Node(2, torch.Tensor([[xa, midx], [midy, yb]]), parent) - S3 = Node(2, torch.Tensor([[midx, xb], [ya, midy]]), parent) - S4 = Node(2, torch.Tensor([[midx, xb], [midy, yb]]), parent) - - parent.children = [S1, S2, S3, S4] - - for child in parent.children: - S.append(child) - self.construct_2d(child.bounds, levels - 1, S, child) - else: - return None + def __init__(self, d, bounds, parent): + super().__init__(d, bounds) + self.left = None + self.right = None + self.children = None + self.parent = parent + + if self.parent is None: + self.level = 1 + else: + self.level = parent.level + 1 + + +class HierarchicalBorelSets: + + def __init__(self, d, interval, levels): + if d == 1: + self.top_node = Node(d, torch.Tensor([interval]), None) + elif d == 2: + self.top_node = Node(d, torch.Tensor(interval), None) + + self.Sets = [self.top_node] + self.levels = levels + if d == 1: + self.construct_1d(interval, levels, self.Sets, self.top_node) + else: + self.construct_2d(self.top_node.bounds, levels, self.Sets, self.top_node) + self.d = d + + def get_parent_set(self): + return self.top_node + + def get_sets_level(self, l): + out = [] + for s in self.Sets: + if s.level == l: + out.append(s) + return out + + def get_all_sets(self): + return self.Sets + + def get_ball_coverings(self, n, radius="auto"): + D = self.get_parent_set() + centers = D.return_discretization(n) + n = centers.size()[0] + sets = [] + for i in range(n): + if radius == "auto": + sets.append(BallSet(D.d, centers[i, :], 2.0 / n)) + else: + sets.append(BallSet(D.d, centers[i, :], radius)) + return sets + + def construct_1d(self, interval, levels, S, parent): + + if levels > 1: + a, b = interval + c = (a + b) / 2.0 + + S_1 = Node(1, torch.Tensor([[a, c]]), parent) + S_2 = Node(1, torch.Tensor([[c, b]]), parent) + + parent.left = S_1 + parent.right = S_2 + + S.append(S_1) + self.construct_1d((a, c), levels - 1, S, S_1) + S.append(S_2) + self.construct_1d((c, b), levels - 1, S, S_2) + + else: + return None + + def construct_2d(self, interval, levels, S, parent): + if levels > 1: + xa = interval[0, 0] + xb = interval[0, 1] + ya = interval[1, 0] + yb = interval[1, 1] + + midx = xa + (xb - xa) / 2.0 + midy = ya + (yb - ya) / 2.0 + + S1 = Node(2, torch.Tensor([[xa, midx], [ya, midy]]), parent) + S2 = Node(2, torch.Tensor([[xa, midx], [midy, yb]]), parent) + S3 = Node(2, torch.Tensor([[midx, xb], [ya, midy]]), parent) + S4 = Node(2, torch.Tensor([[midx, xb], [midy, yb]]), parent) + + parent.children = [S1, S2, S3, S4] + + for child in parent.children: + S.append(child) + self.construct_2d(child.bounds, levels - 1, S, child) + else: + return None if __name__ == "__main__": - center = torch.Tensor([0.5, 0.5]).double() - radius = 0.1 - d = 2 - B = BallSet(d, center, radius) - - weights, xtest = B.return_legendre_discretization(10) - xtest2 = B.return_discretization(10) - print(torch.sum(weights)) - plt.plot(xtest[:, 0], xtest[:, 1], 'ko') - plt.plot(xtest2[:, 0], xtest2[:, 1], 'ro') - plt.show() + center = torch.Tensor([0.5, 0.5]).double() + radius = 0.1 + d = 2 + B = BallSet(d, center, radius) + + weights, xtest = B.return_legendre_discretization(10) + xtest2 = B.return_discretization(10) + print(torch.sum(weights)) + plt.plot(xtest[:, 0], xtest[:, 1], "ko") + plt.plot(xtest2[:, 0], xtest2[:, 1], "ro") + plt.show() diff --git a/stpy/candidate_set.py b/stpy/candidate_set.py index 663a2aa..e3333e7 100644 --- a/stpy/candidate_set.py +++ b/stpy/candidate_set.py @@ -2,60 +2,61 @@ import torch -class CandidateSet(): +class CandidateSet: + + def __init__(self): + pass - def __init__(self): - pass class CandidateDiscreteSet(CandidateSet): - def __init__(self, xtest): - super().__init__() - self.xtest = xtest - self.embedded = False - - def get_set_size(self): - return self.xtest.size()[0] - - def get_dim(self): - return self.xtest.size()[1] - - def get_emb_dim(self): - if self.embedded: - return self.emb_xtest.size()[1] - else: - return self.xtest.size()[1] - - def get_random_elements(self, size = 1): - n = self.get_set_size() - indices = np.random.choice(np.arange(0,n,1), size) - print (indices) - if self.embedded: - elem = self.emb_xtest[indices, :] - else: - elem = self.xtest[indices,:] - print (elem) - return elem - - def debug_subsample(self): - self.xtest = self.xtest[0:20000,:] - - def get_options_per_dim(self): - d = {} - dims = self.get_dim() - for i in range(dims): - d[i] = torch.unique(self.xtest[:,i]) - return d - - def get_options(self): - if self.embedded: - return self.emb_xtest - else: - return self.xtest - - def get_options_raw(self): - return self.xtest - - def use_embedding(self, embed): - self.embedded = True - self.emb_xtest = embed(self.xtest) + def __init__(self, xtest): + super().__init__() + self.xtest = xtest + self.embedded = False + + def get_set_size(self): + return self.xtest.size()[0] + + def get_dim(self): + return self.xtest.size()[1] + + def get_emb_dim(self): + if self.embedded: + return self.emb_xtest.size()[1] + else: + return self.xtest.size()[1] + + def get_random_elements(self, size=1): + n = self.get_set_size() + indices = np.random.choice(np.arange(0, n, 1), size) + print(indices) + if self.embedded: + elem = self.emb_xtest[indices, :] + else: + elem = self.xtest[indices, :] + print(elem) + return elem + + def debug_subsample(self): + self.xtest = self.xtest[0:20000, :] + + def get_options_per_dim(self): + d = {} + dims = self.get_dim() + for i in range(dims): + d[i] = torch.unique(self.xtest[:, i]) + return d + + def get_options(self): + if self.embedded: + return self.emb_xtest + else: + return self.xtest + + def get_options_raw(self): + return self.xtest + + def use_embedding(self, embed): + self.embedded = True + self.emb_xtest = embed(self.xtest) diff --git a/stpy/continuous_processes/categorical_mixture.py b/stpy/continuous_processes/categorical_mixture.py index c22bff8..8dc3689 100755 --- a/stpy/continuous_processes/categorical_mixture.py +++ b/stpy/continuous_processes/categorical_mixture.py @@ -8,179 +8,188 @@ class CategoricalMixture(GaussianProcess): - def __init__(self, processes, init_weights=None, d=1, bounds=None): - if init_weights is None: - self.k = len(processes) - init_weights = torch.ones(size=(self.k, 1)).view(-1).double() * 1. / float(self.k) - else: - self.k = len(processes) - - if len(processes) != init_weights.shape[0]: - raise AssertionError("Not the same number") - - self.processes = processes - self.bounds = bounds - self.beta = 2. - self.d = d - self.x = None - self.y = None - self.init_weights = init_weights - if torch.sum(self.init_weights) > 1.: - self.init_weights = self.init_weights / torch.sum(self.init_weights) - self.weights = self.init_weights - - def add_data_point(self, x, y): - for model in self.processes: - model.add_data_point(x, y) - - def log_prob_normal(self, K, y): - Knumpy = K.detach().numpy() - ynumpy = y.detach().numpy() - - decomp = scipy.linalg.lu_factor(Knumpy) - alpha = scipy.linalg.lu_solve(decomp, ynumpy) - - logprob = -0.5 * ynumpy.T.dot(alpha) - 0.5 * np.linalg.slogdet(Knumpy)[1] - 0.5 * ynumpy.shape[0] * np.log( - 2 * np.pi) - - return float(logprob) - - def fit_gp(self, x, y, iterative=False): - self.x = x - self.y = y - - logprobs = torch.zeros(size=(self.k, 1)).view(-1).double() - - for j in range(self.k): - GP = self.processes[j] - GP.fit(x, y) - K = GP.get_kernel() - logprobs[j] = self.log_prob_normal(K, y) - - # print("Neg. log likelihood vector:", -logprobs) - - log_init_prob = torch.log(self.init_weights) - log_posterior = log_init_prob + logprobs - log_evidence = torch.logsumexp(log_posterior, dim=0) - self.weights = torch.exp(log_posterior - log_evidence) - - # print ("Categorical Probability: ",self.weights) - # print ("---------------------------------") - - self.fit = True - return True - - def mean_std(self, xtest): - mu = torch.zeros(size=(xtest.size()[0], 1)).double() - s = torch.zeros(size=(xtest.size()[0], 1)).double() - for j in range(self.k): - (a1, a2) = self.processes[j].mean_std(xtest) - - mu = mu + self.weights[j] * a1 - s = s + self.weights[j] * a2 ** 2 - s = torch.sqrt(s) - return (mu, s) - - def sample(self, xtest, size=1, with_mask=False): - # sample a GP - k = np.random.choice(np.arange(0, self.k, 1), p=self.weights.flatten()) - mask = [k] - if self.fit == True: - self.processes[k].fit(self.x, self.y) - samples = self.processes[k].sample(xtest, size=1) - else: - samples = self.processes[k].sample(xtest, size=1) - - for s in range(size - 1): - k = np.random.choice(np.arange(0, self.k, 1), p=self.weights.flatten()) - mask.append(k) - if self.fit == True: - self.processes[k].fit(self.x, self.y) - sample = self.processes[k].sample(xtest, size=1) - samples = torch.cat((samples, sample), dim=1) - else: - sample = self.processes[k].sample(xtest, size=1) - samples = torch.cat((samples, sample), dim=1) - if with_mask == True: - return (samples, mask) - else: - return samples + def __init__(self, processes, init_weights=None, d=1, bounds=None): + if init_weights is None: + self.k = len(processes) + init_weights = ( + torch.ones(size=(self.k, 1)).view(-1).double() * 1.0 / float(self.k) + ) + else: + self.k = len(processes) + + if len(processes) != init_weights.shape[0]: + raise AssertionError("Not the same number") + + self.processes = processes + self.bounds = bounds + self.beta = 2.0 + self.d = d + self.x = None + self.y = None + self.init_weights = init_weights + if torch.sum(self.init_weights) > 1.0: + self.init_weights = self.init_weights / torch.sum(self.init_weights) + self.weights = self.init_weights + + def add_data_point(self, x, y): + for model in self.processes: + model.add_data_point(x, y) + + def log_prob_normal(self, K, y): + Knumpy = K.detach().numpy() + ynumpy = y.detach().numpy() + + decomp = scipy.linalg.lu_factor(Knumpy) + alpha = scipy.linalg.lu_solve(decomp, ynumpy) + + logprob = ( + -0.5 * ynumpy.T.dot(alpha) + - 0.5 * np.linalg.slogdet(Knumpy)[1] + - 0.5 * ynumpy.shape[0] * np.log(2 * np.pi) + ) + + return float(logprob) + + def fit_gp(self, x, y, iterative=False): + self.x = x + self.y = y + + logprobs = torch.zeros(size=(self.k, 1)).view(-1).double() + + for j in range(self.k): + GP = self.processes[j] + GP.fit(x, y) + K = GP.get_kernel() + logprobs[j] = self.log_prob_normal(K, y) + + # print("Neg. log likelihood vector:", -logprobs) + + log_init_prob = torch.log(self.init_weights) + log_posterior = log_init_prob + logprobs + log_evidence = torch.logsumexp(log_posterior, dim=0) + self.weights = torch.exp(log_posterior - log_evidence) + + # print ("Categorical Probability: ",self.weights) + # print ("---------------------------------") + + self.fit = True + return True + + def mean_std(self, xtest): + mu = torch.zeros(size=(xtest.size()[0], 1)).double() + s = torch.zeros(size=(xtest.size()[0], 1)).double() + for j in range(self.k): + (a1, a2) = self.processes[j].mean_std(xtest) + + mu = mu + self.weights[j] * a1 + s = s + self.weights[j] * a2**2 + s = torch.sqrt(s) + return (mu, s) + + def sample(self, xtest, size=1, with_mask=False): + # sample a GP + k = np.random.choice(np.arange(0, self.k, 1), p=self.weights.flatten()) + mask = [k] + if self.fit == True: + self.processes[k].fit(self.x, self.y) + samples = self.processes[k].sample(xtest, size=1) + else: + samples = self.processes[k].sample(xtest, size=1) + + for s in range(size - 1): + k = np.random.choice(np.arange(0, self.k, 1), p=self.weights.flatten()) + mask.append(k) + if self.fit == True: + self.processes[k].fit(self.x, self.y) + sample = self.processes[k].sample(xtest, size=1) + samples = torch.cat((samples, sample), dim=1) + else: + sample = self.processes[k].sample(xtest, size=1) + samples = torch.cat((samples, sample), dim=1) + if with_mask == True: + return (samples, mask) + else: + return samples if __name__ == "__main__": - # domain size - L_infinity_ball = 5 - # dimension - d = 1 - # error variance - s = 0.001 - # grid density - n = 512 - # number of intial points - N = 15 - - # model - # GP1 = GaussianProcess(kernel="squared_exponential", s=s, gamma = 1.5, diameter=L_infinity_ball) - GP1 = GaussianProcess(kernel="modified_matern", s=s, kappa=1., nu=2, gamma=1.5) - GP2 = GaussianProcess(kernel="modified_matern", s=s, kappa=1., nu=1, gamma=0.7) - # GP2 = GaussianProcess(kernel="squared_exponential", s=s, gamma=1.1) - GP3 = GaussianProcess(kernel="modified_matern", s=s, kappa=1., nu=2, gamma=1) - GP4 = GaussianProcess(kernel="linear", s=s, kappa=1.) - - # data - # GPTrue = GaussianProcess(kernel="linear", s=0, kappa=1., diameter=L_infinity_ball) - # GPTrue = GaussianProcess(kernel="squared_exponential", s=s, gamma=2., kappa = 1) - GPTrue = GaussianProcess(kernel="modified_matern", s=s, kappa=1., nu=2, gamma=1.1) - - # test environment - - d = 1 - from stpy.test_functions.benchmarks import GaussianProcessSample - - BenchmarkFunc = GaussianProcessSample(d=d, n=n, sigma=0., gamma=0.2, name="squared_exponential") - x = BenchmarkFunc.initial_guess(N) - xtest = BenchmarkFunc.interval(n) - BenchmarkFunc.optimize(xtest, s) - gamma = BenchmarkFunc.bandwidth() - bounds = BenchmarkFunc.bounds() - BenchmarkFunc.scale_max(xtest=xtest) - F = lambda x: BenchmarkFunc.eval(x, sigma=s) - - # targets - y = F(x) - GPs = [GP1, GP2, GP3, GP4] - # Mix = CategoricalMixture(GPs,init_weights=np.array([0.01,0.01,0.98])) - Mix = CategoricalMixture(GPs) - - for j in range(N): - plt.figure(1) - plt.clf() - X = x[0:j + 1, :].reshape(-1, 1) - y = F(X) - Mix.fit_gp(X, y) - (mu, var) = Mix.mean_std(xtest) - samples = Mix.sample(xtest, size=5) - f = F(xtest).numpy() - mu = mu.numpy() - var = var.numpy() - samples = samples.numpy() - xtest2 = xtest.numpy() - - plt.plot(xtest2, samples, '--', linewidth=2, alpha=0.3) - plt.plot(xtest2, mu, 'k', linewidth=3) - plt.plot(xtest2, mu, 'k', linewidth=3) - plt.fill_between(xtest2.flat, (mu - 2 * var).flat, (mu + 2 * var).flat, color="#dddddd") - plt.plot(X, y, 'ro', markersize=10) - plt.plot(xtest2, f, 'g', linewidth=3) - plt.draw() - - plt.figure(2) - plt.clf() - plt.title("Probability of Category") - plt.bar(np.arange(len(GPs)), Mix.weights, np.ones(len(GPs)) * 0.5) - plt.xticks(np.arange(len(GPs)), [GP.description() for GP in GPs], rotation=30) - plt.subplots_adjust(bottom=0.35) - plt.plot() - plt.show() + # domain size + L_infinity_ball = 5 + # dimension + d = 1 + # error variance + s = 0.001 + # grid density + n = 512 + # number of intial points + N = 15 + + # model + # GP1 = GaussianProcess(kernel="squared_exponential", s=s, gamma = 1.5, diameter=L_infinity_ball) + GP1 = GaussianProcess(kernel="modified_matern", s=s, kappa=1.0, nu=2, gamma=1.5) + GP2 = GaussianProcess(kernel="modified_matern", s=s, kappa=1.0, nu=1, gamma=0.7) + # GP2 = GaussianProcess(kernel="squared_exponential", s=s, gamma=1.1) + GP3 = GaussianProcess(kernel="modified_matern", s=s, kappa=1.0, nu=2, gamma=1) + GP4 = GaussianProcess(kernel="linear", s=s, kappa=1.0) + + # data + # GPTrue = GaussianProcess(kernel="linear", s=0, kappa=1., diameter=L_infinity_ball) + # GPTrue = GaussianProcess(kernel="squared_exponential", s=s, gamma=2., kappa = 1) + GPTrue = GaussianProcess(kernel="modified_matern", s=s, kappa=1.0, nu=2, gamma=1.1) + + # test environment + + d = 1 + from stpy.test_functions.benchmarks import GaussianProcessSample + + BenchmarkFunc = GaussianProcessSample( + d=d, n=n, sigma=0.0, gamma=0.2, name="squared_exponential" + ) + x = BenchmarkFunc.initial_guess(N) + xtest = BenchmarkFunc.interval(n) + BenchmarkFunc.optimize(xtest, s) + gamma = BenchmarkFunc.bandwidth() + bounds = BenchmarkFunc.bounds() + BenchmarkFunc.scale_max(xtest=xtest) + F = lambda x: BenchmarkFunc.eval(x, sigma=s) + + # targets + y = F(x) + GPs = [GP1, GP2, GP3, GP4] + # Mix = CategoricalMixture(GPs,init_weights=np.array([0.01,0.01,0.98])) + Mix = CategoricalMixture(GPs) + + for j in range(N): + plt.figure(1) + plt.clf() + X = x[0 : j + 1, :].reshape(-1, 1) + y = F(X) + Mix.fit_gp(X, y) + (mu, var) = Mix.mean_std(xtest) + samples = Mix.sample(xtest, size=5) + f = F(xtest).numpy() + mu = mu.numpy() + var = var.numpy() + samples = samples.numpy() + xtest2 = xtest.numpy() + + plt.plot(xtest2, samples, "--", linewidth=2, alpha=0.3) + plt.plot(xtest2, mu, "k", linewidth=3) + plt.plot(xtest2, mu, "k", linewidth=3) + plt.fill_between( + xtest2.flat, (mu - 2 * var).flat, (mu + 2 * var).flat, color="#dddddd" + ) + plt.plot(X, y, "ro", markersize=10) + plt.plot(xtest2, f, "g", linewidth=3) + plt.draw() + + plt.figure(2) + plt.clf() + plt.title("Probability of Category") + plt.bar(np.arange(len(GPs)), Mix.weights, np.ones(len(GPs)) * 0.5) + plt.xticks(np.arange(len(GPs)), [GP.description() for GP in GPs], rotation=30) + plt.subplots_adjust(bottom=0.35) + plt.plot() + plt.show() # plt.pause(4) diff --git a/stpy/continuous_processes/convex_rkhs.py b/stpy/continuous_processes/convex_rkhs.py index 30f3775..a3df5e1 100644 --- a/stpy/continuous_processes/convex_rkhs.py +++ b/stpy/continuous_processes/convex_rkhs.py @@ -2,17 +2,21 @@ import torch from torchmin import minimize from stpy.candidate_set import CandidateDiscreteSet -from stpy.generative_models.conditional_generative_model import ConditionalGenerativeModel +from stpy.generative_models.conditional_generative_model import ( + ConditionalGenerativeModel, +) + + class ConvexRKHS(KernelizedFeatures): - """ - """ + """ """ - def __init__(self, embedding, m, lam = 0. , s = 0.01): + def __init__(self, embedding, m, lam=0.0, s=0.01): super().__init__(embedding, m) self.Gamma = torch.eye(m, requires_grad=True).double() self.lam = lam self.s = s - def fit(self,x=None,y=None): + + def fit(self, x=None, y=None): """ legacy method :param x: @@ -20,32 +24,53 @@ def fit(self,x=None,y=None): :return: """ pass + def weight_scaling(self, Gamma, scale, x_single, y, Phi): x = torch.tile(x_single, (y.size()[0], 1)) - return torch.exp(-torch.sum(((Phi(x) - Phi(y)) @ Gamma /scale) ** 2, axis=1)) + return torch.exp(-torch.sum(((Phi(x) - Phi(y)) @ Gamma / scale) ** 2, axis=1)) def local_fit(self, weights): D = torch.diag(weights) X = self.embed(self.x) - theta = torch.linalg.inv((X.T @ D @ X) + self.lam * torch.eye(self.m)) @ X.T @ D @ self.y + theta = ( + torch.linalg.inv((X.T @ D @ X) + self.lam * torch.eye(self.m)) + @ X.T + @ D + @ self.y + ) return theta - def optimize_params(self, type='bandwidth', restarts=10, regularizer=None, - maxiter=1000, mingradnorm=1e-4, verbose=False, optimizer="pymanopt", scale=1., weight=1., save = False, - save_name = 'model.np', init_func = None, bounds = None, parallel = False, cores = None): + + def optimize_params( + self, + type="bandwidth", + restarts=10, + regularizer=None, + maxiter=1000, + mingradnorm=1e-4, + verbose=False, + optimizer="pymanopt", + scale=1.0, + weight=1.0, + save=False, + save_name="model.np", + init_func=None, + bounds=None, + parallel=False, + cores=None, + ): x_data = self.x y_data = self.y Phi = lambda x: self.embedding.embed(x) m = self.get_basis_size() - def total_loss(gamma): weights = [] predictions = [] for i in range(x_data.size()[0]): x = x_data[i] - Gamma = torch.diag(gamma) - w = self.weight_scaling(Gamma, 1., x, x_data, Phi) + Gamma = torch.diag(gamma) + w = self.weight_scaling(Gamma, 1.0, x, x_data, Phi) X = Phi(x_data) # local fit in the new coordinates @@ -61,21 +86,21 @@ def total_loss(gamma): for p1, w1 in zip(predictions, weights): # loss that makes sure we predict correctly - loss = 1* torch.sum(((p1 - y_data) ** 2)/(self.s**2) * (w1)) / 2 + loss = 1 * torch.sum(((p1 - y_data) ** 2) / (self.s**2) * (w1)) / 2 for p2, w2 in zip(predictions, weights): # loss that makes sure the predictions are consistent (this can be a larger set) - loss += 1* torch.sum((p1 - p2)**2/(self.s**2) * (w1 * w2)) + loss += 1 * torch.sum((p1 - p2) ** 2 / (self.s**2) * (w1 * w2)) - return loss + 0.001*torch.sum(gamma**2) + return loss + 0.001 * torch.sum(gamma**2) # optimize this vals = [] args = [] for _ in range(restarts): - gamma = torch.randn(m, requires_grad=True).double()**2 + gamma = torch.randn(m, requires_grad=True).double() ** 2 total_loss(gamma) - result = minimize(total_loss, gamma, method='bfgs', disp=2) + result = minimize(total_loss, gamma, method="bfgs", disp=2) vals.append(result.fun) args.append(result.x) @@ -83,11 +108,11 @@ def total_loss(gamma): def mean(self, xtest): phitest = self.embed(xtest) - out = torch.zeros(size = (phitest.size()[0],1)).double() + out = torch.zeros(size=(phitest.size()[0], 1)).double() for i, x in enumerate(xtest): - w = self.weight_scaling(self.Gamma, 1., x, self.x, self.embed) - out[i] = 0. - f = self.embed(x)@self.local_fit(w) + w = self.weight_scaling(self.Gamma, 1.0, x, self.x, self.embed) + out[i] = 0.0 + f = self.embed(x) @ self.local_fit(w) out[i] = f return out @@ -96,17 +121,17 @@ def best_points_so_far(self): get all points which are above max - 2*s :return: """ - conservative_best_value = torch.max(self.y) - 2*self.s + conservative_best_value = torch.max(self.y) - 2 * self.s mask = self.y > conservative_best_value - return self.x[mask,:] + return self.x[mask, :] - def sample_neighbourhood_sample(self, x_loc, candidate_set, cut_off = 0.01, size = 10): - if isinstance(CandidateDiscreteSet,candidate_set): + def sample_neighbourhood_sample(self, x_loc, candidate_set, cut_off=0.01, size=10): + if isinstance(CandidateDiscreteSet, candidate_set): xtest = self.embed(candidate_set.get_options_raw) - w = self.weight_scaling(self.Gamma, 1., x_loc,xtest, self.embed) + w = self.weight_scaling(self.Gamma, 1.0, x_loc, xtest, self.embed) selection = xtest[w > cut_off] max_v = selection.size()[0] - indices = np.random.choice(max_v, size = size) + indices = np.random.choice(max_v, size=size) out = selection[indices] return out elif isinstance(ConditionalGenerativeModel, candidate_set): @@ -115,7 +140,7 @@ def sample_neighbourhood_sample(self, x_loc, candidate_set, cut_off = 0.01, size NotImplementedError("The requested candidate set method is not implemented") def func_gradient(self, x): - w = self.weight_scaling(self.Gamma, 1., x, self.x, self.embed) + w = self.weight_scaling(self.Gamma, 1.0, x, self.x, self.embed) return self.local_fit(weights=w) @@ -129,23 +154,23 @@ def func_gradient(self, x): n = 256 N = 4 lam = 1e-6 - gamma_original = torch.randn(size = (embedding.get_m(),)).double() + gamma_original = torch.randn(size=(embedding.get_m(),)).double() xtest = interval_torch(d=1, n=n) - x = torch.zeros(size =(N,1)).double() + x = torch.zeros(size=(N, 1)).double() x = x.uniform_() Phi_original = lambda x: embedding.embed(x) @ torch.diag(gamma_original) Phi = lambda x: embedding.embed(x) y = torch.sum(Phi_original(x) ** 2, axis=1).view(-1) - ytest= torch.sum(Phi_original(xtest) ** 2, axis=1).view(-1) - Estimator = ConvexRKHS(embedding, embedding.get_m(), lam = lam ) - #Estimator = torch.compile(Estimator) + ytest = torch.sum(Phi_original(xtest) ** 2, axis=1).view(-1) + Estimator = ConvexRKHS(embedding, embedding.get_m(), lam=lam) + # Estimator = torch.compile(Estimator) Estimator.load_data((x, y)) Estimator.optimize_params() - print ("True gamma:",gamma_original) - print ("Optimized gamma:", torch.diag(Estimator.Gamma)) + print("True gamma:", gamma_original) + print("Optimized gamma:", torch.diag(Estimator.Gamma)) offset = 20 Phi = lambda x: embedding.embed(x) fig, ax1 = plt.subplots() @@ -153,24 +178,31 @@ def func_gradient(self, x): for i in range(xtest.size()[0]): x = xtest[i] - w = Estimator.weight_scaling(Estimator.Gamma, 1., x, xtest, Phi) + w = Estimator.weight_scaling(Estimator.Gamma, 1.0, x, xtest, Phi) D = torch.diag(w) X = Phi(xtest) - theta = torch.linalg.inv((X.T@D@X) + lam * torch.eye(embedding.get_m()))@X.T@D@ytest - prediction = (X@theta).detach() - - if i%64 == 0: - p = ax1.plot(xtest[i], - prediction[i],'o',ms = 10) - - ax1.plot(xtest[np.max([0,i-offset]):np.min([i+offset,n])], - prediction[np.max([0,i-offset]):np.min([i+offset,n])], color = p[0].get_color()) - ax2.plot(xtest, w, color = p[0].get_color()) + theta = ( + torch.linalg.inv((X.T @ D @ X) + lam * torch.eye(embedding.get_m())) + @ X.T + @ D + @ ytest + ) + prediction = (X @ theta).detach() + + if i % 64 == 0: + p = ax1.plot(xtest[i], prediction[i], "o", ms=10) + + ax1.plot( + xtest[np.max([0, i - offset]) : np.min([i + offset, n])], + prediction[np.max([0, i - offset]) : np.min([i + offset, n])], + color=p[0].get_color(), + ) + ax2.plot(xtest, w, color=p[0].get_color()) mu = Estimator.mean(xtest) - ax1.plot(xtest, mu, 'b') - ax1.plot(xtest,ytest,'k--') - ax1.plot(Estimator.x,Estimator.y,'ko') + ax1.plot(xtest, mu, "b") + ax1.plot(xtest, ytest, "k--") + ax1.plot(Estimator.x, Estimator.y, "ko") - plt.show() \ No newline at end of file + plt.show() diff --git a/stpy/continuous_processes/dirichlet_mixture.py b/stpy/continuous_processes/dirichlet_mixture.py index 2839b61..cb6a6ff 100755 --- a/stpy/continuous_processes/dirichlet_mixture.py +++ b/stpy/continuous_processes/dirichlet_mixture.py @@ -7,117 +7,127 @@ class DirichletMixture(Estimator): - def __init__(self, processes): - self.processes = processes - self.k = len(self.processes) - self.s = processes[0].s - - def fit_GP(self, X, y, xtest=None, N=200): - self.X = X - self.y = y - n = X.shape[0] - self.fit = True - return True - - def custom_kernel(self, a, b, alpha): - kernel = alpha[0] * self.processes[0].kernel(a, b) - for j in np.arange(1, self.k, 1): - kernel = kernel + alpha[j] * self.processes[j].kernel(a, b) - return kernel - - def mean_var(self, xtest, N=100): - - self.K_mix = np.zeros(shape=(n, n)) - - mu = xtest * 0 - s = xtest * 0 - - samples = np.zeros(shape=(N, xtest.shape[0], xtest.shape[1])) - - for i in range(N): - alpha = np.random.dirichlet(np.ones(shape=(self.k)) * (1. / float(self.k)), 1)[0] - print("Dirichlet sample:", alpha) - kernel = lambda a, b: self.custom_kernel(a, b, alpha) - GP_mix = GaussianProcess(kernel="custom", custom=kernel, s=self.s) - GP_mix.fit_GP(self.X, self.y) - samples[i, :, :] = GP_mix.sample(xtest) - - mu = np.mean(samples, axis=0) - s = np.var(samples, axis=0) - s = np.sqrt(s) - - return (mu, s) - - def sample(self, xtest, size=1, with_mask=False): - # sample a GP - if self.fit == True: - alpha = np.random.dirichlet(np.ones(shape=(self.k)) * (1. / float(self.k)), 1)[0] - kernel = lambda a, b: self.custom_kernel(a, b, alpha) - GP_mix = GaussianProcess(kernel="custom", custom=kernel, s=self.s) - GP_mix.fit_GP(self.X, self.y) - return GP_mix.sample(xtest) - else: - alpha = np.random.dirichlet(np.ones(shape=(self.k)) * (1. / float(self.k)), 1)[0] - kernel = lambda a, b: self.custom_kernel(a, b, alpha) - GP_mix = GaussianProcess(kernel="custom", custom=kernel, s=self.s) - return GP_mix.sample(xtest) + def __init__(self, processes): + self.processes = processes + self.k = len(self.processes) + self.s = processes[0].s + + def fit_GP(self, X, y, xtest=None, N=200): + self.X = X + self.y = y + n = X.shape[0] + self.fit = True + return True + + def custom_kernel(self, a, b, alpha): + kernel = alpha[0] * self.processes[0].kernel(a, b) + for j in np.arange(1, self.k, 1): + kernel = kernel + alpha[j] * self.processes[j].kernel(a, b) + return kernel + + def mean_var(self, xtest, N=100): + + self.K_mix = np.zeros(shape=(n, n)) + + mu = xtest * 0 + s = xtest * 0 + + samples = np.zeros(shape=(N, xtest.shape[0], xtest.shape[1])) + + for i in range(N): + alpha = np.random.dirichlet( + np.ones(shape=(self.k)) * (1.0 / float(self.k)), 1 + )[0] + print("Dirichlet sample:", alpha) + kernel = lambda a, b: self.custom_kernel(a, b, alpha) + GP_mix = GaussianProcess(kernel="custom", custom=kernel, s=self.s) + GP_mix.fit_GP(self.X, self.y) + samples[i, :, :] = GP_mix.sample(xtest) + + mu = np.mean(samples, axis=0) + s = np.var(samples, axis=0) + s = np.sqrt(s) + + return (mu, s) + + def sample(self, xtest, size=1, with_mask=False): + # sample a GP + if self.fit == True: + alpha = np.random.dirichlet( + np.ones(shape=(self.k)) * (1.0 / float(self.k)), 1 + )[0] + kernel = lambda a, b: self.custom_kernel(a, b, alpha) + GP_mix = GaussianProcess(kernel="custom", custom=kernel, s=self.s) + GP_mix.fit_GP(self.X, self.y) + return GP_mix.sample(xtest) + else: + alpha = np.random.dirichlet( + np.ones(shape=(self.k)) * (1.0 / float(self.k)), 1 + )[0] + kernel = lambda a, b: self.custom_kernel(a, b, alpha) + GP_mix = GaussianProcess(kernel="custom", custom=kernel, s=self.s) + return GP_mix.sample(xtest) if __name__ == "__main__": - # domain size - L_infinity_ball = 5 - # dimension - d = 1 - # error variance - s = 0.001 - # grid density - n = 1024 - # number of intial points - N = 15 - # smoothness - gamma = 2 - - # model - GP1 = GaussianProcess(kernel="squared_exponential", s=s, gamma=1.5, diameter=L_infinity_ball) - GP2 = GaussianProcess(kernel="squared_exponential", s=s, gamma=1.1) - GP3 = GaussianProcess(kernel="modified_matern", s=s, kappa=1., nu=2, gamma=1.1) - GP4 = GaussianProcess(kernel="linear", s=s, kappa=1.) - - # data - # GPTrue = GaussianProcess(kernel="linear", s=0, kappa=1., diameter=L_infinity_ball) - GPTrue = GaussianProcess(kernel="squared_exponential", s=s, gamma=2., kappa=1) - # GPTrue = GaussianProcess(kernel = "modified_matern", s =s, kappa = 1., nu = 2, gamma = 1.1) - - # test environment - TT = code.test_problems.test_functions.test_function() - (d, xtest, x, gamma) = TT.sample_ss_bounds(N, n, d=d, L_infinity_ball=L_infinity_ball) - f = lambda x: TT.sample_ss(x, sigma=0, GP=GPTrue) - - # targets - y = f(x) - GPs = [GP1, GP2, GP3, GP4] - Mix = DirichletMixture(GPs) - for j in range(N): - plt.figure(1) - plt.clf() - X = x[0:j + 1, :].reshape(-1, 1) - y = f(X) - Mix.fit_GP(X, y) - (mu, var) = Mix.mean_var(xtest) - samples = Mix.sample(xtest, size=5) - plt.plot(xtest, samples, '--', linewidth=3, alpha=0.1) - plt.plot(xtest, mu, 'k', linewidth=4) - plt.plot(xtest, mu, 'k', linewidth=4) - plt.fill_between(xtest.flat, (mu - var).flat, (mu + var).flat, color="#dddddd") - plt.plot(X, y, 'ro', markersize=10) - plt.plot(xtest, f(xtest), 'g', linewidth=4) - plt.draw() - # plt.figure(2) - # plt.clf() - # plt.title("Probability of Category") - # plt.bar(np.arange(len(GPs)), Mix.weights, np.ones(len(GPs))*0.5) - # plt.xticks(np.arange(len(GPs)), [GP.description() for GP in GPs], rotation=30) - # plt.subplots_adjust(bottom=0.35) - # plt.draw() - plt.pause(4) + # domain size + L_infinity_ball = 5 + # dimension + d = 1 + # error variance + s = 0.001 + # grid density + n = 1024 + # number of intial points + N = 15 + # smoothness + gamma = 2 + + # model + GP1 = GaussianProcess( + kernel="squared_exponential", s=s, gamma=1.5, diameter=L_infinity_ball + ) + GP2 = GaussianProcess(kernel="squared_exponential", s=s, gamma=1.1) + GP3 = GaussianProcess(kernel="modified_matern", s=s, kappa=1.0, nu=2, gamma=1.1) + GP4 = GaussianProcess(kernel="linear", s=s, kappa=1.0) + + # data + # GPTrue = GaussianProcess(kernel="linear", s=0, kappa=1., diameter=L_infinity_ball) + GPTrue = GaussianProcess(kernel="squared_exponential", s=s, gamma=2.0, kappa=1) + # GPTrue = GaussianProcess(kernel = "modified_matern", s =s, kappa = 1., nu = 2, gamma = 1.1) + + # test environment + TT = code.test_problems.test_functions.test_function() + (d, xtest, x, gamma) = TT.sample_ss_bounds( + N, n, d=d, L_infinity_ball=L_infinity_ball + ) + f = lambda x: TT.sample_ss(x, sigma=0, GP=GPTrue) + + # targets + y = f(x) + GPs = [GP1, GP2, GP3, GP4] + Mix = DirichletMixture(GPs) + for j in range(N): + plt.figure(1) + plt.clf() + X = x[0 : j + 1, :].reshape(-1, 1) + y = f(X) + Mix.fit_GP(X, y) + (mu, var) = Mix.mean_var(xtest) + samples = Mix.sample(xtest, size=5) + plt.plot(xtest, samples, "--", linewidth=3, alpha=0.1) + plt.plot(xtest, mu, "k", linewidth=4) + plt.plot(xtest, mu, "k", linewidth=4) + plt.fill_between(xtest.flat, (mu - var).flat, (mu + var).flat, color="#dddddd") + plt.plot(X, y, "ro", markersize=10) + plt.plot(xtest, f(xtest), "g", linewidth=4) + plt.draw() + # plt.figure(2) + # plt.clf() + # plt.title("Probability of Category") + # plt.bar(np.arange(len(GPs)), Mix.weights, np.ones(len(GPs))*0.5) + # plt.xticks(np.arange(len(GPs)), [GP.description() for GP in GPs], rotation=30) + # plt.subplots_adjust(bottom=0.35) + # plt.draw() + plt.pause(4) diff --git a/stpy/continuous_processes/fourier_fea.py b/stpy/continuous_processes/fourier_fea.py index b635c1c..c39333c 100755 --- a/stpy/continuous_processes/fourier_fea.py +++ b/stpy/continuous_processes/fourier_fea.py @@ -5,500 +5,662 @@ class GaussianProcessFF(KernelizedFeatures): - ''' - Random Fourier Features for Gaussian Kernel - ''' - - def __init__(self, project=None, gamma=0.1, s=0.001, approx="rff", m=100, d=1, diameter=1.0, verbose=True, - groups=None, - bounds=None, scale=1.0, kernel="squared_exponential", nu=0.5, kappa=1.0): - - self.gamma = gamma - self.s = s - self.x = None - self.K = 0 - self.mu = 0.0 - self.fit = False - self.beta = None - self.m = m - self.project = None - self.nu = nu - self.lam = 1. - if groups is None: - self.no_groups = 1 - else: - self.no_groups = len(groups) - - self.approx = approx - self.d = d - self.bounds = bounds - self.groups = groups - self.diameter = diameter - self.admits_first_order = True - self.verbose = verbose - self.kernel = kernel - self.scale = scale - self.m_old = None - self.kappa = kappa - self.heuristic_variance = False - if self.groups is None: - self.embedding_map = self.sample_embedding(self.d, self.m, self.gamma) - self.m = self.embedding_map.m - else: - self.no_groups = float(len(self.groups)) - self.embedding_map = self.sample_embedding_group() - - def resample(self): - self.embedding_map = self.sample_embedding_group() - - def description(self): - """ - Description of GP in text - :return: string with description - """ - return "Fourier Features object\n" + "Appprox: " + self.approx + "\n" + "Bandwidth: " + str( - self.gamma) + "\n" + "Groups:" + str(self.groups) + "\n noise: " + str(self.s) - - def get_gamma(self, t): - if self.kernel == "squared_exponential" and self.groups is None: - return (np.log(t)) ** self.d - elif self.kernel == "linear": - return 10 * self.m - elif self.kernel == "squared_exponential" and self.groups is not None: - return len(self.groups) * (np.log(t)) - elif self.kernel == "matern": - return (np.log(t)) ** self.d - elif self.kernel == "modified_matern": - return (np.log(t)) ** self.d - - def sample_embedding_group(self): - # self.m is a vector of ms - # self.gamma is a vector of gammas - embedding_map = [] - - self.d_effective = int(self.d / self.no_groups) - - if self.groups is not None: - self.d_group_sizes = [len(group) for group in self.groups] - self.d_effective = max(self.d_group_sizes) - - if np.sum(np.array(list(self.gamma.size()))) > 1: - self.gamma = self.gamma - else: - self.gamma = torch.ones(int(self.no_groups), dtype=torch.float64) * self.gamma - - for i, group in enumerate(self.groups): - embedding_map.append(self.sample_embedding(len(group), self.m[i], self.gamma[i])) - self.m[i] = embedding_map[i].m - return embedding_map - - def sample_embedding(self, d_effective, m, gamma): - if self.m_old is not None: - self.m = self.m_old - - if self.approx == "quad": - embedding_map = QuadratureEmbedding(gamma=gamma, nu=self.nu, m=m, d=d_effective, diameter=self.diameter, - groups=None, - kernel=self.kernel, approx=self.approx) - elif self.approx == "rff": - embedding_map = RFFEmbedding(gamma=gamma, nu=self.nu, m=m, d=d_effective, diameter=self.diameter, - groups=None, - kernel=self.kernel, approx=self.approx) - elif self.approx == "rff2": - embedding_map = RFFEmbedding(biased=True, gamma=gamma, nu=self.nu, m=m, d=d_effective, - diameter=self.diameter, groups=None, - kernel=self.kernel, approx=self.approx) - elif self.approx == "halton": - embedding_map = RFFEmbedding(gamma=gamma, nu=self.nu, m=m, d=d_effective, diameter=self.diameter, - groups=None, - kernel=self.kernel, approx=self.approx) - elif self.approx == "hermite": - embedding_map = HermiteEmbedding(gamma=gamma, nu=self.nu, m=m, d=d_effective, diameter=self.diameter, - groups=None, - kernel=self.kernel, approx=self.approx) - elif self.approx == "trapezoidal": - embedding_map = TrapezoidalEmbedding(gamma=gamma, nu=self.nu, m=m, d=d_effective, diameter=self.diameter, - groups=None, - kernel=self.kernel, approx=self.approx) - elif self.approx == "ccff": - embedding_map = ClenshawCurtisEmbedding(gamma=gamma, nu=self.nu, m=m, d=d_effective, diameter=self.diameter, - groups=None, - kernel=self.kernel, approx=self.approx) - elif self.approx == "matern_secific": - embedding_map = MaternEmbedding(gamma=gamma, nu=self.nu, m=m, d=d_effective, diameter=self.diameter, - groups=None, - kernel=self.kernel, approx=self.approx) - elif self.approx == "quad_periodic": - embedding_map = QuadPeriodicEmbedding(gamma=gamma, nu=self.nu, m=m, d=d_effective, diameter=self.diameter, - groups=None, - kernel=self.kernel, approx=self.approx) - elif self.approx == "kl": - embedding_map = KLEmbedding(gamma=gamma, nu=self.nu, m=m, d=d_effective, - diameter=self.diameter, groups=None, kernel=self.kernel, approx=self.approx) - elif self.approx == "orf": - embedding_map = RFFEmbedding(gamma=gamma, nu=self.nu, m=m, d=d_effective, diameter=self.diameter, - groups=None, - kernel=self.kernel, approx=self.approx) - else: - embedding_map = QuadratureEmbedding(gamma=gamma, nu=self.nu, m=m, d=d_effective, diameter=self.diameter, - groups=None, - kernel=self.kernel, approx=self.approx) - self.m_old = self.m - - return embedding_map - - def embed(self, x): - if self.groups is None: - - if self.project is not None: - x = self.project(x) - - return self.embedding_map.embed(x) - - else: - return self.embed_whole(x) - - def embed_group(self, x, group): - return self.embedding_map[group].embed(x) / (np.sqrt(self.no_groups)) - - def embed_whole(self, x): - if self.project is not None: - x = self.project(x) - - if self.groups == None: - return self.embed(x) - else: - n = x.size()[0] - M = torch.zeros(int(torch.sum(self.m)), n, dtype=torch.float64) - for i, group in enumerate(self.groups): - embeding = self.embed_group(x[:, group], i) - index = int(torch.sum(self.m[0:i], dim=0)) - index_next = int(torch.sum(self.m[0:i + 1], dim=0)) - M[index:index_next, :] = torch.t(embeding) - return torch.t(M) - - def get_basis_size(self): - return self.m - - def set_basis_size(self, m): - self.m_old = None - self.m = m - - def right_kernel(self): - embeding = self.embed(self.x) - Z = self.linear_kernel(embeding, embeding) - K = (Z + self.s * self.s * torch.eye(self.n, dtype=torch.float64)) - return K - - def fit_gp(self, x, y, iterative=False): - ''' - Function to Fit GP - ''' - - self.x = x - self.y = y - self.n = list(self.x.size())[0] - self.linear_kernel = KernelFunction(kernel_name="linear").linear_kernel - - if self.groups == None: - embeding = self.embed(x) - self.Z_ = self.linear_kernel(torch.t(embeding), torch.t(embeding)) - self.K = (self.Z_ + self.s * self.s * torch.eye(self.m, dtype=torch.float64)) - self.Q = torch.t(embeding) - - else: ## additive models - M = torch.t(self.embed_whole(x)) - self.Q = M - self.Z_ = self.linear_kernel(M, M) - self.K = self.kappa * self.Z_ + self.s * self.s * torch.eye(int(torch.sum(self.m)), dtype=torch.float64) - - self.fit = True - - return None - - def log_marginal_likelihood_self(self): - return self.log_marginal_likelihood(self.gamma, torch.eye(self.d, dtype=torch.float64), self.kappa) - - def log_marginal_likelihood(self, gamma, Rot, kappa, kernel="default"): - """ - Calculated the log marginal likelihood - :param kernel: custom kenrel object - :return: float - """ - # func = self.kernel_object.get_kernel_function() - - self.x = torch.mm(self.x, Rot) - L = torch.torch.cholesky(self.K, upper=False) - logdet = -0.5 * 2 * torch.sum(torch.log(torch.diag(L))) - - Q = self.embed_whole(self.x) - rhs = torch.mm(torch.t(Q), self.y) - alpha, _ = torch.solve(rhs, self.K) - logprob = -0.5 * (torch.mm(torch.t(self.y), self.y) - torch.mm(torch.t(rhs), - alpha)) / self.s ** 2 + logdet # - 0.5*self.n*np.log(2*np.pi) - logprob = -logprob - - return logprob - - def mean_std(self, xtest, reuse=False): - ''' - Calculate mean and variance for GP at xtest points - ''' - # compute the mean at our test points. - - if self.project is not None: - self.project(xtest) - - if self.groups == None: - embeding = self.embed(xtest) - Q = self.embed(self.x) - else: - self.Z_ = self.K - self.s * self.s * torch.eye(int(torch.sum(self.m)), dtype=torch.float64) - embeding = self.embed_whole(xtest) - Q = self.embed_whole(self.x) - - theta_mean, _ = torch.solve(torch.mm(torch.t(Q), self.y), self.K) - ymean = torch.mm(embeding, theta_mean) - - temp = torch.t(torch.solve(torch.t(embeding), self.K)[0]) - diagonal = self.s * self.s * torch.einsum('ij,ji->i', (temp, torch.t(embeding))).view(-1, 1) - yvar = torch.sqrt(diagonal) - - return (ymean, yvar) - - # def posterior_inf(self, xtest, tol=10e-5, max_int=20000): - # alpha = np.random.randn(self.n, 1) - # err = 10. - # F = 10.0 - # counter = 0 - # embeding = self.embed(self.x) - # K = (linear_kernel(embeding.T, embeding.T) + self.s * self.s * np.eye(self.n)) - # Kinv = np.linalg.pinv(K) - # - # q = [] - # for index in range(self.n): - # q.append(self.embed(self.x[index, :].reshape(1, -1))) - # q = np.array(q) - # - # while (counter < max_int and err / F > tol): - # # first find which index gives maximum - # # print (K.shape) - # index = np.argmax(np.abs(K.dot(alpha) - self.y)) - # sign = np.sign(K.dot(alpha)[index] - self.y[index]) - # - # k = linear_kernel(embeding.T, q[index, :, :].T).reshape(-1, 1) - # # print ("k: ", k.shape) - # oldalpha = alpha - # alpha = alpha - 1. / np.sqrt(counter + 1) * Kinv.dot(self.s * K.dot(alpha) + sign * k) - # err = np.linalg.norm(oldalpha - alpha) - # counter += 1 - # F = np.max(np.abs(K.dot(alpha) - self.y)) + self.s * alpha.T.dot(K.dot(alpha))[0][0] - # - # y_inf = linear_kernel(self.embed(self.x).T, self.embed(xtest).T).T.dot(alpha) - # return y_inf - - def sample_theta(self, size=1): - if self.groups is None: - basis = self.m - else: - basis = int(int(torch.sum(self.m))) - zeros = torch.zeros(basis, size, dtype=torch.float64) - random_vector = torch.normal(mean=zeros, std=1.) - - if self.fit == True: - # random vector - Z = torch.pinverse(self.K) - self.L = torch.cholesky(Z, upper=False) - theta_mean = torch.mm(Z, torch.mm(self.Q, self.y)) - theta = torch.mm(self.s * self.L, random_vector) - theta = theta + theta_mean - else: - theta_mean = 0 - Z = (1. + self.s * self.s) * torch.eye(basis, dtype=torch.float64) - L = torch.cholesky(Z, upper=False) - theta = torch.mm(L, random_vector) + theta_mean - return theta - - def sample(self, xtest, size=1): - ''' - Sample functions from Gaussian Process - ''' - theta = self.sample_theta(size=size) - if self.groups == None: - f = torch.mm(self.embed(xtest), theta) - else: - f = torch.zeros(xtest.size()[0], size, dtype=torch.float64) - for i, group in enumerate(self.groups): - embeding = self.embed_group(xtest[:, group], i) - index = int(torch.sum(self.m[0:i], dim=0)) - index_next = int(torch.sum(self.m[0:i + 1], dim=0)) - f += torch.mm(embeding, theta[index:index_next, :]) - return f - - def sample_and_max(self, xtest, size=1): - ''' - Sample functions from Gaussian Process and take Maximum - ''' - f = self.sample(xtest, size=size) - - index = np.argmax(f.detach(), axis=0) - return (xtest[index, :], f[index, :]) - - def ucb_optimize(self, beta, multistart=25): - - mean = lambda x: self.mean_std(torch.from_numpy(x).view(1, -1))[0][0][0] - sigma = lambda x: self.mean_std(torch.from_numpy(x).view(1, -1))[1][0][0] - - fun = lambda x: -(mean(x) + np.sqrt(beta) * sigma(x)) - # grad = lambda x: -complex_step_derivative(fun,1e-10,x.reshape(1,-1)) - - mybounds = self.bounds - results = [] - from scipy.optimize import minimize - - for i in range(multistart): - x0 = np.random.randn(self.d) - for i in range(self.d): - x0[i] = np.random.uniform(mybounds[i][0], mybounds[i][1]) - - res = minimize(fun, x0, method="L-BFGS-B", jac=None, tol=0.0001, bounds=mybounds) - solution = res.x - results.append([solution, -fun(solution)]) - - results = np.array(results) - index = np.argmax(results[:, 1]) - solution = results[index, 0] - - return (solution, -fun(solution)) - - def special_embed_eval(self, x, theta): - f = 0 - x = torch.from_numpy(x) - # print (x) - for i, group in enumerate(self.groups): - embeding = self.embed_group(x[group].view(-1, len(group)), i) - index = torch.sum(self.m[0:i], dim=0) - index_next = torch.sum(self.m[0:i + 1], dim=0) - f += torch.mm(embeding, theta[int(index):int(index_next), :]) - return f.numpy() - - def special_embed_eval_grad(self, x, theta): - ff = lambda x: self.special_embed_eval(x.flatten(), theta) - grad = complex_step_derivative(ff, 1e-10, x.reshape(-1, 1).T).flatten() - return grad - - def get_lambdas_additive(self, theta): - fun = lambda x: -self.special_embed_eval(x, theta) - grad = lambda x: -self.special_embed_eval_grad(x, theta) - return [fun, grad] - - def get_lambdas(self, theta): - - # complex step differentiation - fun = lambda x: -(torch.mm(self.embed(torch.from_numpy(x).view(1, self.d)), theta).numpy()).flatten() - grad = lambda x: -complex_step_derivative(fun, 1e-10, x.reshape(self.d, 1).T).flatten() - return [fun, grad] - - def sample_and_optimize(self, xtest=None, multistart=25, minimizer="L-BFGS-B", grid=100, verbose=0): - ''' - Sample functions from Gaussian Process and take Maximum using - first order maximization - ''' - - # sample linear approximating - theta = self.sample_theta() - from scipy.optimize import minimize - - # get bounds - if self.bounds == None: - mybounds = tuple([(-self.diameter, self.diameter) for i in range(self.d)]) - else: - mybounds = self.bounds - - fun = lambda x: -torch.mm(torch.t(theta), torch.t(self.embed(torch.from_numpy(x).view(1, -1)))).numpy() - - results = [] - for j in range(multistart): - x0 = np.random.randn(self.d) - for i in range(self.d): - x0[i] = np.random.uniform(mybounds[i][0], mybounds[i][1]) - - if minimizer == "L-BFGS-B": - res = minimize(fun, x0, method="L-BFGS-B", jac=None, tol=0.0001, bounds=mybounds) - solution = res.x - elif minimizer == "ProjGD": - res = projected_gradient_descent(fun, grad, x0, mybounds, tol=0.001, - nu=1. / (self.m * np.max(np.abs(theta)))) - solution = res.x - elif minimizer == "coordinate-wise": - - solution = np.random.randn(self.d) - for i in range(self.d): - if verbose > 0: - print("Dimension: ", i) - fun_cw = lambda x: lambda_coordinate(fun, x0, i, x) - ranges = [slice(mybounds[i][0], mybounds[i][1], 1. / float(grid))] - out = scipy.optimize.brute(fun_cw, ranges, finish=None) - solution[i] = out - if verbose > 0: - print("Soln:", out.T) - elif minimizer == "CD_cw": - raise BaseException("Not implemented yet") - else: - raise AssertionError("Wrong optimizer selected.") - - results.append([solution, -fun(solution)]) - - results = np.array(results) - index = np.argmax(results[:, 1]) - solution = results[index, 0] - - return (torch.from_numpy(solution), -torch.from_numpy(fun(solution))) + """ + Random Fourier Features for Gaussian Kernel + """ + + def __init__( + self, + project=None, + gamma=0.1, + s=0.001, + approx="rff", + m=100, + d=1, + diameter=1.0, + verbose=True, + groups=None, + bounds=None, + scale=1.0, + kernel="squared_exponential", + nu=0.5, + kappa=1.0, + ): + + self.gamma = gamma + self.s = s + self.x = None + self.K = 0 + self.mu = 0.0 + self.fit = False + self.beta = None + self.m = m + self.project = None + self.nu = nu + self.lam = 1.0 + if groups is None: + self.no_groups = 1 + else: + self.no_groups = len(groups) + + self.approx = approx + self.d = d + self.bounds = bounds + self.groups = groups + self.diameter = diameter + self.admits_first_order = True + self.verbose = verbose + self.kernel = kernel + self.scale = scale + self.m_old = None + self.kappa = kappa + self.heuristic_variance = False + if self.groups is None: + self.embedding_map = self.sample_embedding(self.d, self.m, self.gamma) + self.m = self.embedding_map.m + else: + self.no_groups = float(len(self.groups)) + self.embedding_map = self.sample_embedding_group() + + def resample(self): + self.embedding_map = self.sample_embedding_group() + + def description(self): + """ + Description of GP in text + :return: string with description + """ + return ( + "Fourier Features object\n" + + "Appprox: " + + self.approx + + "\n" + + "Bandwidth: " + + str(self.gamma) + + "\n" + + "Groups:" + + str(self.groups) + + "\n noise: " + + str(self.s) + ) + + def get_gamma(self, t): + if self.kernel == "squared_exponential" and self.groups is None: + return (np.log(t)) ** self.d + elif self.kernel == "linear": + return 10 * self.m + elif self.kernel == "squared_exponential" and self.groups is not None: + return len(self.groups) * (np.log(t)) + elif self.kernel == "matern": + return (np.log(t)) ** self.d + elif self.kernel == "modified_matern": + return (np.log(t)) ** self.d + + def sample_embedding_group(self): + # self.m is a vector of ms + # self.gamma is a vector of gammas + embedding_map = [] + + self.d_effective = int(self.d / self.no_groups) + + if self.groups is not None: + self.d_group_sizes = [len(group) for group in self.groups] + self.d_effective = max(self.d_group_sizes) + + if np.sum(np.array(list(self.gamma.size()))) > 1: + self.gamma = self.gamma + else: + self.gamma = ( + torch.ones(int(self.no_groups), dtype=torch.float64) * self.gamma + ) + + for i, group in enumerate(self.groups): + embedding_map.append( + self.sample_embedding(len(group), self.m[i], self.gamma[i]) + ) + self.m[i] = embedding_map[i].m + return embedding_map + + def sample_embedding(self, d_effective, m, gamma): + if self.m_old is not None: + self.m = self.m_old + + if self.approx == "quad": + embedding_map = QuadratureEmbedding( + gamma=gamma, + nu=self.nu, + m=m, + d=d_effective, + diameter=self.diameter, + groups=None, + kernel=self.kernel, + approx=self.approx, + ) + elif self.approx == "rff": + embedding_map = RFFEmbedding( + gamma=gamma, + nu=self.nu, + m=m, + d=d_effective, + diameter=self.diameter, + groups=None, + kernel=self.kernel, + approx=self.approx, + ) + elif self.approx == "rff2": + embedding_map = RFFEmbedding( + biased=True, + gamma=gamma, + nu=self.nu, + m=m, + d=d_effective, + diameter=self.diameter, + groups=None, + kernel=self.kernel, + approx=self.approx, + ) + elif self.approx == "halton": + embedding_map = RFFEmbedding( + gamma=gamma, + nu=self.nu, + m=m, + d=d_effective, + diameter=self.diameter, + groups=None, + kernel=self.kernel, + approx=self.approx, + ) + elif self.approx == "hermite": + embedding_map = HermiteEmbedding( + gamma=gamma, + nu=self.nu, + m=m, + d=d_effective, + diameter=self.diameter, + groups=None, + kernel=self.kernel, + approx=self.approx, + ) + elif self.approx == "trapezoidal": + embedding_map = TrapezoidalEmbedding( + gamma=gamma, + nu=self.nu, + m=m, + d=d_effective, + diameter=self.diameter, + groups=None, + kernel=self.kernel, + approx=self.approx, + ) + elif self.approx == "ccff": + embedding_map = ClenshawCurtisEmbedding( + gamma=gamma, + nu=self.nu, + m=m, + d=d_effective, + diameter=self.diameter, + groups=None, + kernel=self.kernel, + approx=self.approx, + ) + elif self.approx == "matern_secific": + embedding_map = MaternEmbedding( + gamma=gamma, + nu=self.nu, + m=m, + d=d_effective, + diameter=self.diameter, + groups=None, + kernel=self.kernel, + approx=self.approx, + ) + elif self.approx == "quad_periodic": + embedding_map = QuadPeriodicEmbedding( + gamma=gamma, + nu=self.nu, + m=m, + d=d_effective, + diameter=self.diameter, + groups=None, + kernel=self.kernel, + approx=self.approx, + ) + elif self.approx == "kl": + embedding_map = KLEmbedding( + gamma=gamma, + nu=self.nu, + m=m, + d=d_effective, + diameter=self.diameter, + groups=None, + kernel=self.kernel, + approx=self.approx, + ) + elif self.approx == "orf": + embedding_map = RFFEmbedding( + gamma=gamma, + nu=self.nu, + m=m, + d=d_effective, + diameter=self.diameter, + groups=None, + kernel=self.kernel, + approx=self.approx, + ) + else: + embedding_map = QuadratureEmbedding( + gamma=gamma, + nu=self.nu, + m=m, + d=d_effective, + diameter=self.diameter, + groups=None, + kernel=self.kernel, + approx=self.approx, + ) + self.m_old = self.m + + return embedding_map + + def embed(self, x): + if self.groups is None: + + if self.project is not None: + x = self.project(x) + + return self.embedding_map.embed(x) + + else: + return self.embed_whole(x) + + def embed_group(self, x, group): + return self.embedding_map[group].embed(x) / (np.sqrt(self.no_groups)) + + def embed_whole(self, x): + if self.project is not None: + x = self.project(x) + + if self.groups == None: + return self.embed(x) + else: + n = x.size()[0] + M = torch.zeros(int(torch.sum(self.m)), n, dtype=torch.float64) + for i, group in enumerate(self.groups): + embeding = self.embed_group(x[:, group], i) + index = int(torch.sum(self.m[0:i], dim=0)) + index_next = int(torch.sum(self.m[0 : i + 1], dim=0)) + M[index:index_next, :] = torch.t(embeding) + return torch.t(M) + + def get_basis_size(self): + return self.m + + def set_basis_size(self, m): + self.m_old = None + self.m = m + + def right_kernel(self): + embeding = self.embed(self.x) + Z = self.linear_kernel(embeding, embeding) + K = Z + self.s * self.s * torch.eye(self.n, dtype=torch.float64) + return K + + def fit_gp(self, x, y, iterative=False): + """ + Function to Fit GP + """ + + self.x = x + self.y = y + self.n = list(self.x.size())[0] + self.linear_kernel = KernelFunction(kernel_name="linear").linear_kernel + + if self.groups == None: + embeding = self.embed(x) + self.Z_ = self.linear_kernel(torch.t(embeding), torch.t(embeding)) + self.K = self.Z_ + self.s * self.s * torch.eye(self.m, dtype=torch.float64) + self.Q = torch.t(embeding) + + else: ## additive models + M = torch.t(self.embed_whole(x)) + self.Q = M + self.Z_ = self.linear_kernel(M, M) + self.K = self.kappa * self.Z_ + self.s * self.s * torch.eye( + int(torch.sum(self.m)), dtype=torch.float64 + ) + + self.fit = True + + return None + + def log_marginal_likelihood_self(self): + return self.log_marginal_likelihood( + self.gamma, torch.eye(self.d, dtype=torch.float64), self.kappa + ) + + def log_marginal_likelihood(self, gamma, Rot, kappa, kernel="default"): + """ + Calculated the log marginal likelihood + :param kernel: custom kenrel object + :return: float + """ + # func = self.kernel_object.get_kernel_function() + + self.x = torch.mm(self.x, Rot) + L = torch.torch.cholesky(self.K, upper=False) + logdet = -0.5 * 2 * torch.sum(torch.log(torch.diag(L))) + + Q = self.embed_whole(self.x) + rhs = torch.mm(torch.t(Q), self.y) + alpha, _ = torch.solve(rhs, self.K) + logprob = ( + -0.5 + * (torch.mm(torch.t(self.y), self.y) - torch.mm(torch.t(rhs), alpha)) + / self.s**2 + + logdet + ) # - 0.5*self.n*np.log(2*np.pi) + logprob = -logprob + + return logprob + + def mean_std(self, xtest, reuse=False): + """ + Calculate mean and variance for GP at xtest points + """ + # compute the mean at our test points. + + if self.project is not None: + self.project(xtest) + + if self.groups == None: + embeding = self.embed(xtest) + Q = self.embed(self.x) + else: + self.Z_ = self.K - self.s * self.s * torch.eye( + int(torch.sum(self.m)), dtype=torch.float64 + ) + embeding = self.embed_whole(xtest) + Q = self.embed_whole(self.x) + + theta_mean, _ = torch.solve(torch.mm(torch.t(Q), self.y), self.K) + ymean = torch.mm(embeding, theta_mean) + + temp = torch.t(torch.solve(torch.t(embeding), self.K)[0]) + diagonal = ( + self.s + * self.s + * torch.einsum("ij,ji->i", (temp, torch.t(embeding))).view(-1, 1) + ) + yvar = torch.sqrt(diagonal) + + return (ymean, yvar) + + # def posterior_inf(self, xtest, tol=10e-5, max_int=20000): + # alpha = np.random.randn(self.n, 1) + # err = 10. + # F = 10.0 + # counter = 0 + # embeding = self.embed(self.x) + # K = (linear_kernel(embeding.T, embeding.T) + self.s * self.s * np.eye(self.n)) + # Kinv = np.linalg.pinv(K) + # + # q = [] + # for index in range(self.n): + # q.append(self.embed(self.x[index, :].reshape(1, -1))) + # q = np.array(q) + # + # while (counter < max_int and err / F > tol): + # # first find which index gives maximum + # # print (K.shape) + # index = np.argmax(np.abs(K.dot(alpha) - self.y)) + # sign = np.sign(K.dot(alpha)[index] - self.y[index]) + # + # k = linear_kernel(embeding.T, q[index, :, :].T).reshape(-1, 1) + # # print ("k: ", k.shape) + # oldalpha = alpha + # alpha = alpha - 1. / np.sqrt(counter + 1) * Kinv.dot(self.s * K.dot(alpha) + sign * k) + # err = np.linalg.norm(oldalpha - alpha) + # counter += 1 + # F = np.max(np.abs(K.dot(alpha) - self.y)) + self.s * alpha.T.dot(K.dot(alpha))[0][0] + # + # y_inf = linear_kernel(self.embed(self.x).T, self.embed(xtest).T).T.dot(alpha) + # return y_inf + + def sample_theta(self, size=1): + if self.groups is None: + basis = self.m + else: + basis = int(int(torch.sum(self.m))) + zeros = torch.zeros(basis, size, dtype=torch.float64) + random_vector = torch.normal(mean=zeros, std=1.0) + + if self.fit == True: + # random vector + Z = torch.pinverse(self.K) + self.L = torch.cholesky(Z, upper=False) + theta_mean = torch.mm(Z, torch.mm(self.Q, self.y)) + theta = torch.mm(self.s * self.L, random_vector) + theta = theta + theta_mean + else: + theta_mean = 0 + Z = (1.0 + self.s * self.s) * torch.eye(basis, dtype=torch.float64) + L = torch.cholesky(Z, upper=False) + theta = torch.mm(L, random_vector) + theta_mean + return theta + + def sample(self, xtest, size=1): + """ + Sample functions from Gaussian Process + """ + theta = self.sample_theta(size=size) + if self.groups == None: + f = torch.mm(self.embed(xtest), theta) + else: + f = torch.zeros(xtest.size()[0], size, dtype=torch.float64) + for i, group in enumerate(self.groups): + embeding = self.embed_group(xtest[:, group], i) + index = int(torch.sum(self.m[0:i], dim=0)) + index_next = int(torch.sum(self.m[0 : i + 1], dim=0)) + f += torch.mm(embeding, theta[index:index_next, :]) + return f + + def sample_and_max(self, xtest, size=1): + """ + Sample functions from Gaussian Process and take Maximum + """ + f = self.sample(xtest, size=size) + + index = np.argmax(f.detach(), axis=0) + return (xtest[index, :], f[index, :]) + + def ucb_optimize(self, beta, multistart=25): + + mean = lambda x: self.mean_std(torch.from_numpy(x).view(1, -1))[0][0][0] + sigma = lambda x: self.mean_std(torch.from_numpy(x).view(1, -1))[1][0][0] + + fun = lambda x: -(mean(x) + np.sqrt(beta) * sigma(x)) + # grad = lambda x: -complex_step_derivative(fun,1e-10,x.reshape(1,-1)) + + mybounds = self.bounds + results = [] + from scipy.optimize import minimize + + for i in range(multistart): + x0 = np.random.randn(self.d) + for i in range(self.d): + x0[i] = np.random.uniform(mybounds[i][0], mybounds[i][1]) + + res = minimize( + fun, x0, method="L-BFGS-B", jac=None, tol=0.0001, bounds=mybounds + ) + solution = res.x + results.append([solution, -fun(solution)]) + + results = np.array(results) + index = np.argmax(results[:, 1]) + solution = results[index, 0] + + return (solution, -fun(solution)) + + def special_embed_eval(self, x, theta): + f = 0 + x = torch.from_numpy(x) + # print (x) + for i, group in enumerate(self.groups): + embeding = self.embed_group(x[group].view(-1, len(group)), i) + index = torch.sum(self.m[0:i], dim=0) + index_next = torch.sum(self.m[0 : i + 1], dim=0) + f += torch.mm(embeding, theta[int(index) : int(index_next), :]) + return f.numpy() + + def special_embed_eval_grad(self, x, theta): + ff = lambda x: self.special_embed_eval(x.flatten(), theta) + grad = complex_step_derivative(ff, 1e-10, x.reshape(-1, 1).T).flatten() + return grad + + def get_lambdas_additive(self, theta): + fun = lambda x: -self.special_embed_eval(x, theta) + grad = lambda x: -self.special_embed_eval_grad(x, theta) + return [fun, grad] + + def get_lambdas(self, theta): + + # complex step differentiation + fun = lambda x: -( + torch.mm(self.embed(torch.from_numpy(x).view(1, self.d)), theta).numpy() + ).flatten() + grad = lambda x: -complex_step_derivative( + fun, 1e-10, x.reshape(self.d, 1).T + ).flatten() + return [fun, grad] + + def sample_and_optimize( + self, xtest=None, multistart=25, minimizer="L-BFGS-B", grid=100, verbose=0 + ): + """ + Sample functions from Gaussian Process and take Maximum using + first order maximization + """ + + # sample linear approximating + theta = self.sample_theta() + from scipy.optimize import minimize + + # get bounds + if self.bounds == None: + mybounds = tuple([(-self.diameter, self.diameter) for i in range(self.d)]) + else: + mybounds = self.bounds + + fun = lambda x: -torch.mm( + torch.t(theta), torch.t(self.embed(torch.from_numpy(x).view(1, -1))) + ).numpy() + + results = [] + for j in range(multistart): + x0 = np.random.randn(self.d) + for i in range(self.d): + x0[i] = np.random.uniform(mybounds[i][0], mybounds[i][1]) + + if minimizer == "L-BFGS-B": + res = minimize( + fun, x0, method="L-BFGS-B", jac=None, tol=0.0001, bounds=mybounds + ) + solution = res.x + elif minimizer == "ProjGD": + res = projected_gradient_descent( + fun, + grad, + x0, + mybounds, + tol=0.001, + nu=1.0 / (self.m * np.max(np.abs(theta))), + ) + solution = res.x + elif minimizer == "coordinate-wise": + + solution = np.random.randn(self.d) + for i in range(self.d): + if verbose > 0: + print("Dimension: ", i) + fun_cw = lambda x: lambda_coordinate(fun, x0, i, x) + ranges = [slice(mybounds[i][0], mybounds[i][1], 1.0 / float(grid))] + out = scipy.optimize.brute(fun_cw, ranges, finish=None) + solution[i] = out + if verbose > 0: + print("Soln:", out.T) + elif minimizer == "CD_cw": + raise BaseException("Not implemented yet") + else: + raise AssertionError("Wrong optimizer selected.") + + results.append([solution, -fun(solution)]) + + results = np.array(results) + index = np.argmax(results[:, 1]) + solution = results[index, 0] + + return (torch.from_numpy(solution), -torch.from_numpy(fun(solution))) if __name__ == "__main__": - # domain size - L_infinity_ball = 1 - # dimension - d = 2 - # error variance - s = 0.001 - # grid density - n = 50 - # number of intial points - N = 200 - # smoothness - gamma = torch.from_numpy(np.array([0.4, 0.4])) - # test problem - - xtest = torch.from_numpy(interval(n, d)) - x = torch.from_numpy(np.random.uniform(-L_infinity_ball, L_infinity_ball, size=(N, d))) - - f_no_noise = lambda q: torch.sin(torch.sum(q * 4, dim=1)).view(-1, 1) - # f_no_noise = lambda q: torch.sin((q[:,0] * 4)).view(-1, 1) - - f = lambda q: f_no_noise(q) + torch.normal(mean=torch.zeros(q.size()[0], 1, dtype=torch.float64), std=1., - out=None) * s - # targets - y = f(x) - - # GP model with squared exponential - m = torch.from_numpy(np.array([100, 100])) - - groups = [[0], [1]] - GP = GaussianProcessFF(kernel="squared_exponential", s=s, m=m, d=d, gamma=gamma, groups=groups, approx="hermite") - # GP2 = GaussianProcess(kernel="ard", s=s, d=d, gamma=gamma, groups=None) - - # fit GP - GP.fit_gp(x, y) - # GP2.fit_gp(x,y) - - GP.optimize_params("rots", 10, optimizer="pymanopt") - - print("Log probability:", GP.log_marginal_likelihood_self()) - # print ("Log probability:", GP2.log_marginal_likelihood_self() ) - - GP.visualize(xtest, f_true=f_no_noise) + # domain size + L_infinity_ball = 1 + # dimension + d = 2 + # error variance + s = 0.001 + # grid density + n = 50 + # number of intial points + N = 200 + # smoothness + gamma = torch.from_numpy(np.array([0.4, 0.4])) + # test problem + + xtest = torch.from_numpy(interval(n, d)) + x = torch.from_numpy( + np.random.uniform(-L_infinity_ball, L_infinity_ball, size=(N, d)) + ) + + f_no_noise = lambda q: torch.sin(torch.sum(q * 4, dim=1)).view(-1, 1) + # f_no_noise = lambda q: torch.sin((q[:,0] * 4)).view(-1, 1) + + f = ( + lambda q: f_no_noise(q) + + torch.normal( + mean=torch.zeros(q.size()[0], 1, dtype=torch.float64), std=1.0, out=None + ) + * s + ) + # targets + y = f(x) + + # GP model with squared exponential + m = torch.from_numpy(np.array([100, 100])) + + groups = [[0], [1]] + GP = GaussianProcessFF( + kernel="squared_exponential", + s=s, + m=m, + d=d, + gamma=gamma, + groups=groups, + approx="hermite", + ) + # GP2 = GaussianProcess(kernel="ard", s=s, d=d, gamma=gamma, groups=None) + + # fit GP + GP.fit_gp(x, y) + # GP2.fit_gp(x,y) + + GP.optimize_params("rots", 10, optimizer="pymanopt") + + print("Log probability:", GP.log_marginal_likelihood_self()) + # print ("Log probability:", GP2.log_marginal_likelihood_self() ) + + GP.visualize(xtest, f_true=f_no_noise) # GP2.visualize(xtest, f_true=f_no_noise) diff --git a/stpy/continuous_processes/ga_process.py b/stpy/continuous_processes/ga_process.py index c0a1537..6317b08 100755 --- a/stpy/continuous_processes/ga_process.py +++ b/stpy/continuous_processes/ga_process.py @@ -5,208 +5,234 @@ class GammaContProcess(Estimator): - def __init__(self, gamma=1, s=0.001, kappa=1., kernel="squared_exponential", diameter=1.0, - groups=None, bounds=None, nu=2, safe=False, kernel_custom=None, d=1): - """ - - :param gamma: Smoothnes parameter for squared exponential, laplace and matern kernel - :param s: level of noise - :param kernel: choose from a list - :param diameter: diameter of the set (deprecated) - :param groups: additive groups - :param bounds: bounds for the continuous optimization - :param v: parameter for matern kernel - """ - - ## GP properties - self.s = s - self.d = d - self.x = None - self.K = np.array([1.0]) - self.mu = 0.0 - self.safe = False - self.fit = False - self.diameter = diameter - self.bounds = bounds - self.admits_first_order = False - self.back_prop = True - - ## kernel hyperparameters - if kernel_custom is not None: - self.kernel_object = kernel_custom - self.kernel = kernel_custom.kernel - else: - self.kernel_object = KernelFunction(kernel_name=kernel, gamma=gamma, nu=nu, groups=groups, kappa=kappa) - self.kernel = self.kernel_object.kernel - - self.gamma = gamma - self.v = nu - self.groups = groups - self.kappa = kappa - self.custom = kernel_custom - self.optkernel = kernel - - def description(self): - """ - Description of GP in text - :return: string with description - """ - return self.kernel_object.description() + "\n noise: " + str(self.s) - - def get_gamma(self, t): - """ - ?? - :param t: - :return: - """ - if self.optkernel == "squared_exponential" and self.groups is None: - return (np.log(t)) ** self.d - elif self.optkernel == "linear": - return 10 * self.d - elif self.optkernel == "squared_exponential" and self.groups is not None: - return len(self.groups) * (np.log(t)) - elif self.optkernel == "matern": - return (np.log(t)) ** self.d - elif self.optkernel == "modified_matern": - return (np.log(t)) ** self.d - - def make_safe(self, x): - """ - Make the input dataset numerically stable by removing duplicates? - :param x: - :return: - """ - self.epsilon = 0.001 - # remove vectors that are very close to each other - return x - - def fit_gp(self, x, y, iterative=False, extrapoint=False): - """ - Fits the Gaussian process, possible update is via iterative inverse - :param x: data x - :param y: values y - :param iterative: iterative inverse, where only last point of x is used - :param extrapoint: iterative inverse must be allowed, x is the only addition - :return: - """ - # first fit - if (self.fit == False or iterative == False): - if self.safe == True: - x = self.make_safe(x) - - self.x = x - self.y = y - try: - self.n, self.d = list(x.size()) - except: - self.n, self.d = x.shape - self.K = self.kernel(x, x) + self.s * self.s * torch.eye(self.n, dtype=torch.float64) - - self.fit = True - else: - # iterative inverse - if (iterative == True): - if extrapoint == False: - last_point = self.x[-1, :].view(1, -1) - else: - last_point = x - old_K = self.K - old_Kinv = self.Kinv - else: - pass - - return None - - def beta(self, delta=1e-12, norm=1): - beta_value = self.s * norm + torch.sqrt( - 2 * torch.log(1. / delta + torch.log(torch.det(self.K) / self.s ** self.n))) - return beta_value - - def execute(self, xtest): - if self.fit == True: - K_star = self.kernel(self.x, xtest) - else: - K_star = None - K_star_star = self.kernel(xtest, xtest) - return (K_star, K_star_star) - - # @check_numpy(1) - def mean_var(self, xtest, full=False): - """ - Return posterior mean and variance as tuple - :param xtest: grid, numpy array (2D) - :param full: Instead of just poinwise variance, full covariance can be outputed (bool) - :return: (tensor,tensor) - """ - - (K_star, K_star_star) = self.execute(xtest) - - if self.fit == False: - if full == False: - - x = torch.sum(xtest, dim=1) - first = torch.diag(K_star_star).view(-1, 1) - variance = first - yvar = torch.sqrt(variance) - else: - first = K_star_star - yvar = first - - return (0 * x.view(-1, 1), yvar) - - if self.back_prop == False: - decomp = torch.btrifact(self.K.unsqueeze(0)) - A = torch.btrisolve(self.y.unsqueeze(0), *decomp)[0, :, :] - self.B = torch.t(torch.btrisolve(torch.t(K_star).unsqueeze(0), *decomp)[0, :, :]) - else: - A, _ = torch.gesv(self.y, self.K) - self.B = torch.t(torch.gesv(torch.t(K_star), self.K)[0]) - - ymean = torch.mm(K_star, A) - - if full == False: - first = torch.diag(K_star_star).view(-1, 1) - second = torch.einsum('ij,ji->i', (self.B, torch.t(K_star))).view(-1, 1) - variance = first - second - yvar = torch.sqrt(variance) - else: - first = K_star_star - second = torch.mm(self.B, torch.t(K_star)) - yvar = first - second - - return (ymean, yvar) - - def sample(self, xtest, size=1): - """ - Samples Path from GP, return a numpy array evaluated over grid - :param xtest: grid - :param size: number of samples - :return: numpy array - """ - nn = list(xtest.size())[0] - - if self.fit == True: - (ymean, yvar) = self.mean_var(xtest, full=True) - Cov = yvar + self.s * self.s * torch.eye(nn, dtype=torch.float64) - L = torch.cholesky(Cov, upper=False) - random_vector = torch.normal(mean=torch.zeros(nn, size, dtype=torch.float64), std=1.) - f = ymean + torch.abs(torch.mm(L, random_vector)) - else: - (K_star, K_star_star) = self.execute(xtest) - L = torch.cholesky(K_star_star + (10e-10 + self.s * self.s) * torch.eye(nn, dtype=torch.float64), - upper=False) - random_vector = torch.normal(mean=torch.zeros(nn, size, dtype=torch.float64), std=1.) - f = self.mu + torch.mm(L, random_vector) - return f - - def sample_and_max(self, xtest, size=1): - """ - Samples Path from GP and takes argmax - :param xtest: grid - :param size: number of samples - :return: (argmax, max) - """ - f = self.sample(xtest, size=size) - self.temp = f - val, index = torch.max(f, dim=0) - return (xtest[index, :], val) + def __init__( + self, + gamma=1, + s=0.001, + kappa=1.0, + kernel="squared_exponential", + diameter=1.0, + groups=None, + bounds=None, + nu=2, + safe=False, + kernel_custom=None, + d=1, + ): + """ + + :param gamma: Smoothnes parameter for squared exponential, laplace and matern kernel + :param s: level of noise + :param kernel: choose from a list + :param diameter: diameter of the set (deprecated) + :param groups: additive groups + :param bounds: bounds for the continuous optimization + :param v: parameter for matern kernel + """ + + ## GP properties + self.s = s + self.d = d + self.x = None + self.K = np.array([1.0]) + self.mu = 0.0 + self.safe = False + self.fit = False + self.diameter = diameter + self.bounds = bounds + self.admits_first_order = False + self.back_prop = True + + ## kernel hyperparameters + if kernel_custom is not None: + self.kernel_object = kernel_custom + self.kernel = kernel_custom.kernel + else: + self.kernel_object = KernelFunction( + kernel_name=kernel, gamma=gamma, nu=nu, groups=groups, kappa=kappa + ) + self.kernel = self.kernel_object.kernel + + self.gamma = gamma + self.v = nu + self.groups = groups + self.kappa = kappa + self.custom = kernel_custom + self.optkernel = kernel + + def description(self): + """ + Description of GP in text + :return: string with description + """ + return self.kernel_object.description() + "\n noise: " + str(self.s) + + def get_gamma(self, t): + """ + ?? + :param t: + :return: + """ + if self.optkernel == "squared_exponential" and self.groups is None: + return (np.log(t)) ** self.d + elif self.optkernel == "linear": + return 10 * self.d + elif self.optkernel == "squared_exponential" and self.groups is not None: + return len(self.groups) * (np.log(t)) + elif self.optkernel == "matern": + return (np.log(t)) ** self.d + elif self.optkernel == "modified_matern": + return (np.log(t)) ** self.d + + def make_safe(self, x): + """ + Make the input dataset numerically stable by removing duplicates? + :param x: + :return: + """ + self.epsilon = 0.001 + # remove vectors that are very close to each other + return x + + def fit_gp(self, x, y, iterative=False, extrapoint=False): + """ + Fits the Gaussian process, possible update is via iterative inverse + :param x: data x + :param y: values y + :param iterative: iterative inverse, where only last point of x is used + :param extrapoint: iterative inverse must be allowed, x is the only addition + :return: + """ + # first fit + if self.fit == False or iterative == False: + if self.safe == True: + x = self.make_safe(x) + + self.x = x + self.y = y + try: + self.n, self.d = list(x.size()) + except: + self.n, self.d = x.shape + self.K = self.kernel(x, x) + self.s * self.s * torch.eye( + self.n, dtype=torch.float64 + ) + + self.fit = True + else: + # iterative inverse + if iterative == True: + if extrapoint == False: + last_point = self.x[-1, :].view(1, -1) + else: + last_point = x + old_K = self.K + old_Kinv = self.Kinv + else: + pass + + return None + + def beta(self, delta=1e-12, norm=1): + beta_value = self.s * norm + torch.sqrt( + 2 * torch.log(1.0 / delta + torch.log(torch.det(self.K) / self.s**self.n)) + ) + return beta_value + + def execute(self, xtest): + if self.fit == True: + K_star = self.kernel(self.x, xtest) + else: + K_star = None + K_star_star = self.kernel(xtest, xtest) + return (K_star, K_star_star) + + # @check_numpy(1) + def mean_var(self, xtest, full=False): + """ + Return posterior mean and variance as tuple + :param xtest: grid, numpy array (2D) + :param full: Instead of just poinwise variance, full covariance can be outputed (bool) + :return: (tensor,tensor) + """ + + (K_star, K_star_star) = self.execute(xtest) + + if self.fit == False: + if full == False: + + x = torch.sum(xtest, dim=1) + first = torch.diag(K_star_star).view(-1, 1) + variance = first + yvar = torch.sqrt(variance) + else: + first = K_star_star + yvar = first + + return (0 * x.view(-1, 1), yvar) + + if self.back_prop == False: + decomp = torch.btrifact(self.K.unsqueeze(0)) + A = torch.btrisolve(self.y.unsqueeze(0), *decomp)[0, :, :] + self.B = torch.t( + torch.btrisolve(torch.t(K_star).unsqueeze(0), *decomp)[0, :, :] + ) + else: + A, _ = torch.gesv(self.y, self.K) + self.B = torch.t(torch.gesv(torch.t(K_star), self.K)[0]) + + ymean = torch.mm(K_star, A) + + if full == False: + first = torch.diag(K_star_star).view(-1, 1) + second = torch.einsum("ij,ji->i", (self.B, torch.t(K_star))).view(-1, 1) + variance = first - second + yvar = torch.sqrt(variance) + else: + first = K_star_star + second = torch.mm(self.B, torch.t(K_star)) + yvar = first - second + + return (ymean, yvar) + + def sample(self, xtest, size=1): + """ + Samples Path from GP, return a numpy array evaluated over grid + :param xtest: grid + :param size: number of samples + :return: numpy array + """ + nn = list(xtest.size())[0] + + if self.fit == True: + (ymean, yvar) = self.mean_var(xtest, full=True) + Cov = yvar + self.s * self.s * torch.eye(nn, dtype=torch.float64) + L = torch.cholesky(Cov, upper=False) + random_vector = torch.normal( + mean=torch.zeros(nn, size, dtype=torch.float64), std=1.0 + ) + f = ymean + torch.abs(torch.mm(L, random_vector)) + else: + (K_star, K_star_star) = self.execute(xtest) + L = torch.cholesky( + K_star_star + + (10e-10 + self.s * self.s) * torch.eye(nn, dtype=torch.float64), + upper=False, + ) + random_vector = torch.normal( + mean=torch.zeros(nn, size, dtype=torch.float64), std=1.0 + ) + f = self.mu + torch.mm(L, random_vector) + return f + + def sample_and_max(self, xtest, size=1): + """ + Samples Path from GP and takes argmax + :param xtest: grid + :param size: number of samples + :return: (argmax, max) + """ + f = self.sample(xtest, size=size) + self.temp = f + val, index = torch.max(f, dim=0) + return (xtest[index, :], val) diff --git a/stpy/continuous_processes/gauss_procc.py b/stpy/continuous_processes/gauss_procc.py index b0af70e..88989fc 100755 --- a/stpy/continuous_processes/gauss_procc.py +++ b/stpy/continuous_processes/gauss_procc.py @@ -15,1132 +15,1524 @@ class GaussianProcess(Estimator): - def __init__(self, gamma=1, s=0.001, kappa=1., kernel_name="squared_exponential", diameter=1.0, - groups=None, bounds=None, nu=1.5, kernel=None, d=1, power=2, lam=1., loss = 'squared', huber_delta = 1.35, - hyper = 'classical', B = 1., svr_eps = 0.1): - """ - - :param gamma: Smoothnes parameter for squared exponential, laplace and matern kernel - :param s: level of noise - :param kernel: choose from a list - :param diameter: diameter of the set (deprecated) - :param groups: additive groups - :param bounds: bounds for the continuous optimization - :param v: parameter for matern kernel - """ - - ## GP properties - self.s = s - self.d = d - self.x = None - self.K = np.array([1.0]) - self.mu = 0.0 - self.lam = lam - self.total_bound = B - self.prob = 0.5 - self.svr_eps = svr_eps - self.safe = False - self.fitted = False - self.diameter = diameter - self.bounds = bounds - self.admits_first_order = False - self.back_prop = True - self.loss = loss - self.huber_delta = huber_delta - self.hyper = hyper - self.prepared_log_marginal = False - self.warm_start_solution = None - self.max_size = 10000 - ## kernel hyperparameters - if kernel is not None: - self.kernel_object = kernel - self.kernel = kernel.kernel - self.d = kernel.d - else: - self.kernel_object = KernelFunction(kernel_name=kernel_name, gamma=gamma, nu=nu, groups=groups, kappa=kappa, - power=power, d=d) - self.kernel = self.kernel_object.kernel - - self.gamma = gamma - self.v = nu - self.groups = groups - self.kappa = kappa - self.custom = kernel - self.optkernel = kernel_name - - def residuals(self,x,y): - res = (self.mean(x) - y) - return res - - def description(self): - """ - Description of GP in text - :return: string with description - """ - return self.kernel_object.description() + "\nlambda=" + str(self.s) - - def embed(self, x): - return self.kernel_object.embed(x) - - def get_basis_size(self): - return self.kernel_object.get_basis_size() - - def make_safe(self, x): - """ - Make the input dataset numerically stable by removing duplicates? - :param x: - :return: - """ - self.epsilon = 0.001 - # remove vectors that are very close to each other - return x - - def add_data_point(self, x, y, Sigma = None): - - if self.x is not None: - self.x = torch.cat((self.x, x), dim=0) - self.y = torch.cat((self.y, y), dim=0) - if Sigma is None: - self.Sigma = torch.block_diag(self.Sigma, torch.eye(x.size()[0],dtype = torch.double) * self.s) - else: - self.x = x - self.y = y - self.Sigma = Sigma - self.fit_gp(self.x, self.y, Sigma = self.Sigma) - - def fit(self, x=None, y=None): - if x is not None: - self.fit_gp(x,y) - else: - self.fit_gp(self.x,self.y) - - def lcb(self, xtest): - """ - Lower confidence bound - :return: - """ - mu, s = self.mean_std(xtest) - return mu - 2 * s - - def ucb(self, xtest): - """ - Upper confidence bound - :param xtest: - :return: - """ - mu, s = self.mean_std(xtest) - return mu + 2*s - - def fit_gp(self, x, y, Sigma = None, iterative=False, extrapoint=False): - """ - Fits the Gaussian process, possible update is via iterative inverse - :param x: data x - :param y: values y - :param iterative: iterative inverse, where only last point of x is used - :param extrapoint: iterative inverse must be allowed, x is the only addition - :return: - """ - # first fit - try: - self.n, self.d = list(x.size()) - except: - self.n, self.d = x.shape - - if Sigma is None: - self.Sigma = (self.s) * torch.eye(self.n, dtype=torch.float64) - else: - self.Sigma = Sigma - - if (self.fitted == False or iterative == False): - - if self.safe == True: - x = self.make_safe(x) - - self.x = x - self.y = y - self.K = self.kernel(x, x) + self.Sigma.T @ self.Sigma - self.fitted = True - else: - # iterative inverse - if (iterative == True): - if extrapoint == False: - last_point = self.x[-1, :].view(1, -1) - else: - last_point = x - old_K = self.K - old_Kinv = self.Kinv - else: - pass - self.mean_std(x) - return None - - def norm(self): - if self.fitted: - val = torch.sqrt(self.A.T @ self.kernel(self.x, self.x) @ self.A) - return val - else: - return None - - def beta(self, delta=1e-3, norm=1): - """ - return concentration parameter given the current estimates - - :param delta: failure probability - :param norm: norm assumption - :return: - """ - beta_value = self.s * norm + \ - torch.sqrt(2 * torch.log(1. / delta + torch.log(torch.det(self.K) / self.s ** self.n))) - return beta_value - - def execute(self, xtest): - """ - Calculates the covariance between data and xtest - :param xtest: - :return: - """ - if self.fitted == True: - K_star = self.kernel(self.x, xtest) - else: - K_star = None - K_star_star = self.kernel(xtest, xtest) - return (K_star, K_star_star) - - def _huber_fit(self, K_star, newK = None): - alpha = cp.Variable(self.n) - self.jitter = 10e-5 - if newK is None: - K = self.kernel(self.x, self.x) + self.jitter * torch.eye(self.n, dtype=torch.float64) - else: - K = newK.detach() - K = cp.atoms.affine.wraps.psd_wrap(K) - objective = cp.Minimize(cp.sum(cp.huber((K @ alpha - self.y.view(-1).numpy())/self.s,M = self.huber_delta)) + self.lam * cp.quad_form(alpha, K)) - prob = cp.Problem(objective) - prob.solve(solver = cp.MOSEK, enforce_dpp = False) - if K_star is not None: - return K_star@torch.from_numpy(alpha.value).view(-1,1) - else: - return torch.from_numpy(alpha.value).view(-1,1) - - def _svr_fit(self, K_star, newK = None): - alpha = cp.Variable(self.n) - self.jitter = 10e-5 - if newK is None: - K = self.kernel(self.x, self.x) + self.jitter * torch.eye(self.n, dtype=torch.float64) - else: - K = newK.detach() - - K = cp.atoms.affine.wraps.psd_wrap(K) - objective = cp.Minimize(self.lam * cp.quad_form(alpha, K)) - constraints = [cp.abs(K @ alpha - self.y.view(-1).numpy()) <= self.svr_eps ] - prob = cp.Problem(objective, constraints) - prob.solve(solver = cp.MOSEK, enforce_dpp = False) - if K_star is not None: - return K_star@torch.from_numpy(alpha.value).view(-1,1) - else: - return torch.from_numpy(alpha.value).view(-1,1) - - - def _unif_fit(self, K_star, newK = None): - alpha = cp.Variable((self.n,1)) - self.jitter = 10e-5 - if newK is None: - K = self.kernel(self.x, self.x) + self.jitter * torch.eye(self.n, dtype=torch.float64) - else: - K = newK.detach() - - K = cp.atoms.affine.wraps.psd_wrap(K) - con = 2*self.total_bound*self.prob/((1-self.prob)*np.sqrt(2*np.pi*self.s**2)) - objective = cp.Minimize(cp.sum(cp.logistic(cp.square( - (K @ alpha - self.y.view(-1, 1).numpy())/ (np.sqrt(2)*self.s)) + np.log(con) )) + self.lam * cp.quad_form(alpha, K)) - prob = cp.Problem(objective) - prob.solve(solver = cp.MOSEK, enforce_dpp = False) - if K_star is not None: - return K_star@torch.from_numpy(alpha.value).view(-1,1) - else: - return torch.from_numpy(alpha.value).view(-1,1) - - def _unif_fit_torch(self, K_star, newK = None, warm_start = None): - self.jitter = 10e-5 - if newK is None: - K = self.kernel(self.x, self.x) + self.jitter * torch.eye(self.n, dtype=torch.float64) - else: - K = newK.detach() - - con = 2 * self.total_bound * self.prob / ((1 - self.prob) * np.sqrt(2 * np.pi * self.s ** 2)) - unif = lambda alpha: torch.sum(torch.log(torch.exp( ((K@alpha-self.y.view(-1))**2)/(2*self.s**2) + np.log(con) ) + 1 ) ) \ - + self.lam * alpha @ K@ alpha - if warm_start is None: - x_init = torch.zeros(size = (self.n,1)).view(-1).double() - else: - x_init = warm_start.view(-1) - - res = minimize_torch(unif, x_init, method='l-bfgs', tol=1e-3, disp=0, - options={'max_iter': 200, 'gtol': 1e-3}) - alpha = res.x - - if K_star is not None: - return K_star @ alpha.view(-1, 1) - else: - return alpha.view(-1, 1) - - def _huber_fit_torch(self, K_star, newK = None): - self.jitter = 10e-5 - if newK is None: - K = self.kernel(self.x, self.x) + self.jitter * torch.eye(self.n, dtype=torch.float64) - else: - K = newK - L = torch.linalg.cholesky(K) - - huber = lambda beta: torch.nn.functional.huber_loss(L @ beta / self.s, self.y.view(-1) / self.s, - reduction='sum', - delta=self.huber_delta) + self.lam * beta @ beta - #x_init = torch.linalg.solve(L.T@L+torch.eye(self.n).double()*self.s**2*self.lam, self.y) - x_init = torch.zeros(size = (self.n,1)).view(-1).double() - res = minimize_torch(huber, x_init, method='l-bfgs', tol=1e-4, disp=0, - options={'max_iter': 10**3, 'gtol': 1e-4}) - alpha = torch.linalg.solve(L,res.x) - if K_star is not None: - return K_star @ alpha.view(-1, 1) - else: - return alpha.view(-1,1) - - def mean_std(self, xtest, full=False, reuse=False): - if xtest.size()[0] 0: - mu[xtest.size()[0] - xtest.size()[0] % stepby:], std[ - xtest.size()[0] - xtest.size()[0] % stepby:] = self.mean_std_sub( - xtest[xtest.size()[0] - xtest.size()[0] % stepby:, :], reuse=True) - - return mu, std - - def mean_std_sub(self, xtest, full=False, reuse=False): - """ - Return posterior mean and variance as tuple - :param xtest: grid, numpy array (2D) - :param full: Instead of just poinwise variance, full covariance can be outputed (bool) - :return: (tensor,tensor) - """ - if full: - (K_star, K_star_star) = self.execute(xtest) - else: - K_star = self.kernel(self.x, xtest) - diag_K_star_star = torch.hstack([self.kernel(xtest[i,:].view(1,-1),xtest[i,:].view(1,-1)).view(1) for i in range(xtest.size()[0])]) - - if self.fitted == False: - # the process is not fitted - - if full == False: - x = torch.sum(xtest, dim=1) - #first = torch.diag(K_star_star).view(-1, 1) - first = diag_K_star_star.view(-1,1) - variance = first - yvar = torch.sqrt(variance) - else: - x = torch.sum(xtest, dim=1) - first = K_star_star - yvar = first - - return (0 * x.view(-1, 1), yvar) - - else: - - if self.back_prop == False: - if reuse == False: - #self.decomp = torch.lu(self.K.unsqueeze(0)) - self.LU, self.pivot = torch.linalg.lu_factor(self.K.unsqueeze(0)) - #self.A = torch.lu_solve(self.y.unsqueeze(0), *self.decomp)[0, :, :] - self.A = torch.linalg.lu_solve(self.LU, self.pivot, self.y.unsqueeze(0))[0,:,:] - self.B = torch.t(torch.linalg.lu_solve(self.LU, self.pivot ,torch.t(K_star).unsqueeze(0))[0, :, :]) - else: - if reuse == False: - self.A = torch.linalg.lstsq(self.K, self.y)[0] - #self.B = torch.t(torch.linalg.solve(self.K, torch.t(K_star))) - self.B = torch.t(torch.linalg.lstsq(self.K, torch.t(K_star))[0]) - - if self.loss == "squared": - ymean = torch.mm(K_star, self.A) - elif self.loss == "huber": - ymean = self._huber_fit(K_star) - elif self.loss == "svr": - ymean = self._svr_fit(K_star) - elif self.loss == "unif" or self.loss == "unif_new": - ymean = self._unif_fit_torch(K_star) - else: - raise AssertionError("Loss function not implemented.") - - if full == False: - first = diag_K_star_star.view(-1,1) - second = torch.einsum('ij,ji->i', (self.B, torch.t(K_star))).view(-1, 1) - variance = first - second - yvar = torch.sqrt(variance) - else: - first = K_star_star - second = torch.mm(self.B, torch.t(K_star)) - yvar = first - second - - return (ymean, yvar) - - def mean(self, xtest): - """ - Calculates the mean prediction over a specific input space - :param xtest: input - :return: - """ - K_star = self.kernel(self.x, xtest) - - if self.loss == "squared": - ymean = torch.mm(K_star, self.A) - elif self.loss == "huber": - ymean = self._huber_fit(K_star) - else: - raise AssertionError("Loss function not implemented.") - - return ymean - - def gradient_mean_var(self, point, hessian=True): - """ - Can calculate gradient at single point atm. - - :param point: - :return: - """ - - # mean - point.requires_grad_(True) - mu = self.mean_std(point)[0] - nabla_mu = grad(mu, point, create_graph=True)[0][0] - - if hessian == True: - # variance - H = self.kernel_object.get_2_der(point) - C = self.kernel_object.get_1_der(point, self.x) - - V = H - torch.t(C) @ self.K @ C - - return [nabla_mu, V] - else: - return nabla_mu - - def mean_gradient_hessian(self, xtest, hessian=False): - hessian_mu = torch.zeros(size=(self.d, self.d), dtype=torch.float64) - xtest.requires_grad_(True) - # xtest.retain_grad() - mu = self.mean_std(xtest)[0] - # mu.backward(retain_graph=True) - - # nabla_mu = xtest.grad - nabla_mu = grad(mu, xtest, create_graph=True)[0][0] - - if hessian == False: - return nabla_mu - else: - for i in range(self.d): - hessian_mu[i, :] = grad(nabla_mu[i], xtest, create_graph=True, retain_graph=True)[0][0] - return [nabla_mu, hessian_mu] - - def sample(self, xtest, size=1, jitter=10e-8): - """ - Samples Path from GP, return a numpy array evaluated over grid - :param xtest: grid - :param size: number of samples - :return: numpy array - """ - nn = list(xtest.size())[0] - - if self.fitted == True: - (ymean, yvar) = self.mean_std(xtest, full=True) - Cov = yvar + 10e-10 * torch.eye(nn, dtype=torch.float64) - L = torch.linalg.cholesky(Cov) - # L = torch.from_numpy(np.linalg.cholesky(Cov.numpy())) - random_vector = torch.normal(mean=torch.zeros(nn, size, dtype=torch.float64), std=1.) - f = ymean + torch.mm(L, random_vector) - else: - (K_star, K_star_star) = self.execute(xtest) - L = torch.linalg.cholesky(K_star_star + jitter * torch.eye(nn, dtype=torch.float64)) - random_vector = torch.normal(mean=torch.zeros(nn, size, dtype=torch.float64), std=1.) - f = self.mu + torch.mm(L, random_vector) - return f - - def sample_and_max(self, xtest, size=1): - """ - Samples Path from GP and takes argmax - :param xtest: grid - :param size: number of samples - :return: (argmax, max) - """ - f = self.sample(xtest, size=size) - self.temp = f - val, index = torch.max(f, dim=0) - return (xtest[index, :], val) - - - def log_marginal(self, kernel, X, weight): - - if self.loss == "squared": - return self._log_marginal_squared(kernel, X, weight) - elif self.loss == "unif_new": - return self._log_marginal_unif(kernel, X, weight) - else: - return self._log_marginal_map(kernel, X, weight) - - def _log_marginal_unif(self,kernel,X,weight): - if not self.prepared_log_marginal: - self._prepare_log_marginal_unif() - - func = kernel.get_kernel() - self.jitter = 10e-4 - K = func(self.x, self.x, **X) + torch.eye(self.n, dtype=torch.float64) * self.jitter - #print ("Kernel") - #print (K) - L = torch.linalg.cholesky(K) - self.L_unif.value = (L.data.numpy()) - - self.prob_unif.solve(solver=cp.MOSEK, enforce_dpp=False, warm_start=True) - - solution = torch.zeros(size=(self.n, 1), requires_grad=True).reshape(-1).double() - solution.data = torch.from_numpy(self.beta_unif.value) - con = 2 * self.total_bound * self.prob / ((1 - self.prob) * np.sqrt(2 * np.pi * self.s ** 2)) - - loglikelihood = lambda beta: torch.sum(torch.log(torch.exp( ((L@beta-self.y.view(-1))**2)/(2*self.s**2) + np.log(con) ) + 1 ) ) \ - + self.lam * beta.T @ beta - - H = hessian(loglikelihood)(solution) - logdet = - 0.5* torch.slogdet(H)[1] * weight - logprob = -0.5* loglikelihood(solution) + logdet - logprob = -logprob - return logprob - - def _prepare_log_marginal_unif(self): - - self.beta_unif = cp.Variable(self.n) - self.L_unif = cp.Parameter((self.n, self.n)) - - con = 2 * self.total_bound * self.prob / ((1 - self.prob) * np.sqrt(2 * np.pi * self.s ** 2)) - #self.objective_unif = cp.Minimize(cp.sum(cp.logistic(cp.square( - # (self.K_unif @ self.alpha_unif - self.y.view(-1).numpy()) / (np.sqrt(2) * self.s)) + np.log(con))) + self.lam * cp.quad_form( - # self.alpha_unif, self.L)) - self.objective_unif = cp.Minimize(cp.sum(cp.logistic(cp.square( - (self.L_unif @ self.beta_unif - self.y.view(-1).numpy()) / (np.sqrt(2) * self.s)) + np.log(con))) + self.lam * cp.sum_squares(self.beta_unif)) - self.prob_unif = cp.Problem(self.objective_unif) - self.prepared_log_marginal = True - - def _prepare_log_marginal_huber(self): - beta = cp.Variable(self.n) - L = cp.Parameter((self.n, self.n)) - - objective = cp.Minimize(cp.sum( - cp.huber((L @ beta - self.y.view(-1).numpy()) / self.s, M=self.huber_delta)) + self.lam * cp.sum_squares( - beta)) - - prob = cp.Problem(objective) - cvxpylayer = CvxpyLayer(prob, parameters=[L], variables=[beta]) - self.prepared_log_marginal = True - print ("cvxpy-layer has been initialized.") - return cvxpylayer - - def _log_marginal_huber_cvxpy(self, kernel, X, weight): - func = kernel.get_kernel() - self.jitter = 10e-4 - L_tch = torch.linalg.cholesky(func(self.x, self.x, **X) + torch.eye(self.n, dtype=torch.float64) * self.jitter) - - if not self.prepared_log_marginal: - self._cvxpylayer = self._prepare_log_marginal_huber() - solution = self._cvxpylayer(L_tch)[0] - - huber = lambda beta: torch.nn.functional.huber_loss(L_tch@beta/self.s,self.y.view(-1)/self.s,reduction='sum',delta = self.huber_delta) + self.lam * beta.T @ beta - H = torch.autograd.functional.hessian(huber, solution) - - logdet = - 0.5* torch.slogdet(H)[1]* weight - logprob = -0.5* huber(solution) +logdet - logprob = -logprob - return logprob - - - def _log_marginal_map(self, kernel, X, weight): - # this implementation uses Danskin theorem to simplify gradient propagation - func = kernel.get_kernel() - self.jitter = 10e-4 - K_tch =func(self.x, self.x, **X) + torch.eye(self.n, dtype=torch.float64) * self.jitter - - # solve - solution = torch.zeros(size=(self.n, 1), requires_grad=True).reshape(-1).double() - if self.warm_start_solution is None: - self.warm_start_solution = solution.clone() - - if self.loss == "huber": - alpha = self._huber_fit(None, newK = K_tch).detach() - loglikelihood = lambda alpha: torch.nn.functional.huber_loss(K_tch@alpha/self.s,self.y.view(-1)/self.s, - reduction='sum',delta = self.huber_delta) + self.lam * alpha.T @K_tch@ alpha - - solution.data = alpha.reshape(-1).data - self.warm_start_solution.data = solution.data - mask = torch.abs(K_tch @ alpha - self.y)/self.s self.svr_eps).int()) \ - + self.lam * alpha.T @K_tch@ alpha - - solution.data = alpha.reshape(-1).data - self.warm_start_solution.data = solution.data - H = torch.autograd.functional.hessian(loglikelihood, solution) - - elif self.loss == "unif": - alpha = self._unif_fit_torch(None, newK=K_tch).detach() - con = 2 * self.total_bound * self.prob / ((1 - self.prob) * np.sqrt(2 * np.pi * self.s ** 2)) - - - loglikelihood = lambda alpha: torch.sum(torch.log(torch.exp( ((K_tch@alpha-self.y.view(-1))**2)/(2*self.s**2) + np.log(con) ) + 1 ) ) \ - + self.lam * alpha @ K_tch@ alpha - #v = lambda alpha : torch.sum(torch.exp( ((K_tch@alpha-self.y.view(-1))**2)/(2*self.s**2) + np.log(con) )) - solution.data = alpha.reshape(-1).data - self.warm_start_solution.data = solution.data - H = hessian(loglikelihood)(solution) - - logdet = - 0.5* torch.slogdet(H)[1] * weight - logprob = -0.5* loglikelihood(solution) + logdet - logprob = -logprob - return logprob - - - - def _log_marginal_squared(self, kernel, X, weight): - func = kernel.get_kernel() - K = func(self.x, self.x, **X) + torch.eye(self.n, dtype=torch.float64) * self.s * self.s - logdet = -0.5 * torch.slogdet(K)[1] * weight - alpha = torch.linalg.solve(K, self.y) - logprob = -0.5 * torch.mm(torch.t(self.y), alpha) + logdet - logprob = -logprob - return logprob - - def optimize_params(self, type='bandwidth', restarts=10, regularizer=None, - maxiter=1000, mingradnorm=1e-4, verbose=False, optimizer="pymanopt", scale=1., weight=1., save = False, - save_name = 'model.np', init_func = None, bounds = None, parallel = False, cores = None): - - # Spectral norm regularizer - if regularizer is not None: - if regularizer[0] == "spectral_norm": - regularizer_func = lambda S: regularizer[1] * torch.norm(1/S[0], p='nuc') - elif regularizer[0] == 'lasso': - regularizer_func = lambda S: regularizer[1] * torch.norm(1/S[0], p=1) - else: - regularizer_func = None - else: - regularizer_func = None - - if type == "bandwidth": - params = {} - for key, dict2 in self.kernel_object.params_dict.items(): - if 'gamma' in dict2.keys(): - params[key] = {'gamma': (init_func, Euclidean(1), bounds)} - elif 'ard_gamma' in dict2.keys(): - params[key] = {'ard_gamma': (init_func, Euclidean(len(dict2['group'])), bounds)} - - elif type == "bandwidth+noise": - params = {} - init_func_noise = lambda x: self.s - for key, dict2 in self.kernel_object.params_dict.items(): - - if 'gamma' in dict2.keys(): - params[key] = {'gamma': (init_func, Euclidean(1), bounds)} - - elif 'ard_gamma' in dict2.keys(): - params[key] = {'ard_gamma': (init_func, Euclidean(len(dict2['group'])), bounds)} - - params['likelihood'] = {'sigma':(init_func_noise, Euclidean(1), None )} - - elif type == "rots": - params = {} - d = int(self.kernel_object.d) - for key, dict2 in self.kernel_object.params_dict.items(): - if 'rot' in dict2.keys(): - params[key] = {'rot': (None, Stiefel(d, d), None)} - elif type == "groups": - params = {} - optimizer = "discrete" - d = self.kernel_object.d - for key, dict2 in self.kernel_object.params_dict.items(): - if 'groups' in dict2.keys(): - params[key] = {'groups': (None, helper.generate_groups(d), None)} - pass - elif type == "covariance": - params = {} - d = int(self.kernel_object.d) - for key, dict2 in self.kernel_object.params_dict.items(): - if 'cov' in dict2.keys(): - params[key] = {'cov': (None, PSDFixedRank(d, d), None)} - else: - raise AttributeError("This quick-optimization is not implemented.") - - self.optimize_params_general(params=params, restarts=restarts, - optimizer=optimizer, regularizer_func=regularizer_func, - maxiter=maxiter, mingradnorm=mingradnorm, verbose=verbose, scale=scale, - weight=weight, save = save, save_name = save_name, parallel = parallel, cores = cores) - - def log_probability(self, xtest, sample): - from scipy.stats import multivariate_normal - mu, covar = self.mean_std(xtest, full=True) - p = np.log(multivariate_normal.pdf(sample.view(-1).numpy(), mean=mu.view(-1).numpy(), cov=covar.numpy())) - return p - - def volume_mean_cvxpy(self, xtest, weights=None, eps=10e-2, - tol=10e-14, max_weight=1, max_iter=1000, - verbose=False, scale=10e-4, slope=1., - bisections=10, B='auto', optimal_scale=None, - optimize_scale=False, relax='relu'): - - n = self.x.size()[0] - K = self.get_kernel() # (self.x, self.x) - Kinv = torch.pinverse(K + eps * torch.eye(K.size()[0]).double()).numpy() - if weights is None: - weights = torch.ones(self.x.size()[0]) / n - if B == 'auto': - alpha, _ = torch.lstsq(self.y, K) - beta = K @ alpha - B = beta.T @ Kinv @ beta - print("Auto:B", B) - - def fun(scale_arg): - beta = cp.Variable(n) - if relax == 'relu': - loss_fn_transformed = cp.sum(cp.pos(weights * slope * ( - cp.abs(beta - self.y.numpy().reshape(-1)) - eps))) + 0.5 * scale_arg * cp.quad_form(beta, - Kinv) - elif relax == 'log': - loss_fn_transformed = cp.sum(cp.logistic(weights * slope * ( - cp.abs(beta - self.y.numpy().reshape(-1)) - eps))) + 0.5 * scale_arg * cp.quad_form(beta, - Kinv) - - # loss_fn_transformed = cp.sum(weights*logit(slope*(cp.abs(beta - self.y.numpy().reshape(-1)) -eps))) + 0.5*scale_arg*cp.quad_form(beta, Kinv)- - - prob = cp.Problem(cp.Minimize(loss_fn_transformed)) - # prob.solve(solver=cp.MOSEK, feastol=tol, verbose=False) - prob.solve(solver=cp.MOSEK, verbose=False) - if verbose == True: - print("scale:", scale_arg, "cond:", np.linalg.cond(Kinv), "sub.", beta.value.T @ Kinv @ beta.value - B, - "B:", B) - return beta.value.T @ Kinv @ beta.value - B - - if optimize_scale: - return helper.bisection(fun, 0., max_weight, bisections) - - if optimal_scale is None: - scale_star = helper.bisection(fun, 0., max_weight, bisections) - else: - scale_star = optimal_scale - - beta = cp.Variable(n) - if relax == 'relu': - loss_fn_transformed = cp.sum(weights * cp.pos( - slope * (cp.abs(beta - self.y.numpy().reshape(-1)) - eps))) + 0.5 * scale_star * cp.quad_form(beta, - Kinv) - elif relax == 'log': - loss_fn_transformed = cp.sum(weights * cp.logistic( - slope * (cp.abs(beta - self.y.numpy().reshape(-1)) - eps))) + 0.5 * scale_star * cp.quad_form(beta, - Kinv) - prob = cp.Problem(cp.Minimize(loss_fn_transformed)) - # prob.solve(solver=cp.CVXOPT, feastol=tol, verbose=verbose) - prob.solve(solver=cp.MOSEK, verbose=verbose) - beta_torch = torch.from_numpy(beta.value).view(-1, 1) - alpha = torch.from_numpy(Kinv) @ beta_torch - ytest = self.kernel(self.x, xtest) @ alpha - return ytest - - def volume_mean(self, xtest, weights=None, eps=10e-2, tol=10e-6, max_iter=1000, verbose=False, eta_start=0.01, - eta_decrease=0.9, scale=1, slope=1., warm=True, relax='relu', norm=False, B='auto'): - self.scale = scale - self.relax = relax - - K = self.get_kernel() # (self.x, self.x) - Kinv = torch.pinverse(K) - - if weights is None: - weights = torch.ones(self.x.size()[0]) - else: - weights[weights < 10e-6] = 0. # * self.x.size()[0] - weights = weights.view(-1) - if warm == True: - # warm start with L2 fit - alpha, _ = torch.lstsq(self.y, K) - beta = K @ alpha - else: - beta = torch.randn(size=(self.n, 1)).double() # .requires_grad_(True)*0 - - # loss_fn_original = lambda alpha: torch.sum(torch.relu(torch.abs(K @ alpha - self.y) -eps)) + 0.5*self.s * alpha.T @ K @ alpha - if self.relax == "relu": - loss_fn_transformed = lambda beta: torch.sum( - torch.relu(torch.abs(beta - self.y) - eps)) + self.scale * 0.5 * self.s * beta.T @ Kinv @ beta - - elif self.relax == "tanh": - self.slope = slope - tanh = lambda x: (torch.tanh(self.slope * x) + 1) * 0.5 - loss_fn_transformed = lambda beta: torch.sum(weights * tanh(torch.abs(beta - self.y) - eps).view( - -1)) + 0.5 * self.s * self.scale * beta.T @ Kinv @ beta - - elif self.relax == "elu": - self.slope = slope - elu = lambda x: torch.nn.elu(x, alpha=self.slope) - loss_fn_transformed = lambda beta: torch.sum( - elu(torch.abs(beta - self.y) - eps)) + 0.5 * self.s * self.scale * beta.T @ Kinv @ beta - - elif self.relax == "relu": - return self.volume_mean_cvxpy(xtest, weights=weights, eps=eps, scale=scale, tol=tol) - else: - raise AssertionError("Unkown relaxation.") - - current_loss = 10e10 - eta = eta_start - for i in range(max_iter): - grad = self.s * (Kinv @ beta) - beta = self.proximal(beta, grad, eta, eps, weights) - past_loss = current_loss - current_loss = loss_fn_transformed(beta) - if current_loss > past_loss: - eta = eta * eta_decrease - elif np.abs(current_loss - past_loss) < tol: - break - - # print (i, beta.T) - if verbose == True: - print(i, loss_fn_transformed(beta), eta) - - print("final norm:", beta.T @ Kinv @ beta) - - # alpha = torch.inverse(self.K) @ beta - alpha = torch.pinverse(K) @ beta - # alpha = torch.lstsq(K,beta) - ytest = self.kernel(self.x, xtest) @ alpha - # max = torch.max(torch.abs(beta - self.y)) - if norm == True: - return beta.T @ Kinv @ beta - # yz = self.kernel(self.x, self.x) @ alpha - # approx_v = torch.sum(torch.relu(torch.abs(beta - self.y) -eps))/max - # approx_p = approx_v/self.n - # mask = (torch.abs(yz[:,0] - self.y[:,0])) > eps - # approx_p = float(torch.sum(mask))/float(self.n) - return ytest # ,approx_p - - def volume_mean_norm(self, xtest, weights=None, eps=10e-2, tol=10e-6, max_iter=1000, verbose=False, eta_start=0.01, - eta_decrease=0.9, scale=1, slope=1., warm=True, relax='relu', B='auto'): - K = self.kernel(self.x, self.x) - Kinv = torch.pinverse(K) - if B == 'auto': - alpha, _ = torch.lstsq(self.y, self.K) - beta = K @ alpha - B = beta.T @ Kinv @ beta - - func = lambda s: self.volume_mean(xtest, weights=weights, eps=eps, tol=tol, max_iter=max_iter, verbose=verbose, - eta_start=eta_start, - eta_decrease=eta_decrease, scale=s, slope=slope, warm=warm, relax=relax, - norm=True) - B - - s_star = stpy.optim.custom_optimizers.bisection(func, 0., 1000., 10) - - return self.volume_mean(xtest, weights=weights, eps=eps, tol=tol, max_iter=max_iter, verbose=verbose, - eta_start=eta_start, - eta_decrease=eta_decrease, scale=s_star, slope=slope, warm=warm, relax=relax, - norm=False) - - def proximal(self, beta, nabla, eta, eps, weights): - res = beta - for i in range(self.n): - from scipy.optimize import minimize - - b = float(beta[i, :]) - y = float(self.y[i, :]) - g = float(nabla[i, :]) - w = float(weights[i]) - # s = float(self.s) - - tanh = lambda x: (np.tanh(self.slope * x) + 1) * 0.5 - elu = lambda x: torch.elu(x, alpha=self.slope).numpy() - - if self.relax == "relu": - loss_reg = lambda x: w * np.maximum(0, np.abs(x - y) - eps) - elif self.relax == "tanh": - loss_reg = lambda x: w * tanh(np.abs(x - y) - eps) - elif self.relax == "elu": - loss_reg = lambda x: w * elu(np.abs(x - y) - eps) - else: - raise AssertionError("Unkown relaxation.") - - loss_scalar = lambda x: ((1 / (2. * eta)) * (x - (b - eta * g)) ** 2) + loss_reg(x) - - x0 = np.array([0.]) - # print (minimize(loss_scalar,x0,method ='nelder-mead').x) - res[i, :] = float(minimize(loss_scalar, x0, method='nelder-mead').x) - return res - - def get_lambdas(self, beta, mean=False): - """ - Gets lambda function to evaluate acquisiton function and its derivative - :param beta: beta in GP-UCB - :return: [lambda,lambda] - """ - mean = lambda x: self.mean_std(x.reshape(1, -1), reuse=True)[0][0][0] - sigma = lambda x: self.mean_std(x.reshape(1, -1), reuse=True)[1][0][0] - - if mean == True: - return [mean, sigma] - else: - fun = lambda x: -(mean(x) + np.sqrt(beta) * sigma(x)) - grad = lambda x: -complex_step_derivative(fun, 1e-10, x.reshape(1, -1)) - - return [fun, grad] - - def get_kernel(self): - return self.K - - def ucb_optimize(self, beta, multistart=25, lcb=False): - """ - Optimizes UCB acquisiton function and return next point and its value as output - :param beta: beta from GP UCB - :param multistart: number of starts - :return: (next_point, value at next_point) - """ - - mean = lambda x: self.mean_std(x, reuse=True)[0][0][0] - sigma = lambda x: self.mean_std(x, reuse=True)[1][0][0] - - ucb = lambda x: torch.dot(torch.Tensor([1.0, np.sqrt(beta)]), torch.Tensor( - [self.mean_std(x, reuse=True)[0][0][0], self.mean_std(x, reuse=True)[1][0][0]])) - lcb = lambda x: torch.dot(torch.Tensor([1.0, np.sqrt(beta)]), torch.Tensor( - [self.mean_std(x, reuse=True)[0][0][0], -self.mean_std(x, reuse=True)[1][0][0]])) - - if lcb == False: - fun2 = lambda x: -ucb(torch.from_numpy(x).view(1, -1)).numpy() - else: - fun2 = lambda x: -lcb(torch.from_numpy(x).view(1, -1)).numpy() - fun = lambda x: -( - mean(torch.from_numpy(x).view(1, -1)) + np.sqrt(beta) * sigma(torch.from_numpy(x).view(1, -1))) - - self.back_prop = False - self.mean_std(self.x) - - mybounds = self.bounds - - results = [] - - from scipy.optimize import minimize - - for i in range(multistart): - x0 = np.random.randn(self.d) - for i in range(self.d): - x0[i] = np.random.uniform(mybounds[i][0], mybounds[i][1]) - - res = minimize(fun2, x0, method="L-BFGS-B", jac=None, tol=0.000001, bounds=mybounds) - solution = res.x - results.append([solution, -fun(solution)]) - - results = np.array(results) - index = np.argmax(results[:, 1]) - solution = results[index, 0] - - return (torch.from_numpy(solution), -fun(solution)) - - def isin(self, xnext): - self.epsilon = 0.001 - for v in self.x: - if torch.norm(v - xnext, p=2) < self.epsilon: - return True - - def sample_and_condition(self, x): - xprobe = x.view(1, -1) - fprobe = self.sample(xprobe) - if not self.isin(xprobe): - self.x = torch.cat((self.x, xprobe), dim=0) - self.y = torch.cat((self.y, fprobe), dim=0) - self.fit_gp(self.x, self.y) - return -fprobe - - def get_lambdas_TH(self): - fun = lambda x: self.sample_and_condition(x) - grad = None - return [fun, grad] - - def sample_iteratively_max(self, xtest, multistart=20, minimizer="coordinate-wise", grid=100): - """ - Samples Path from GP and takes the maximum iteratively - :param xtest: grid - :param size: number of samples - :return: numpy array - """ - # print ("Iterative:",multistart,minimizer,grid) - from scipy.optimize import minimize - # old stuff - xold = self.x - yold = self.y - - # with fixed grid - if xtest is not None: - # number of samples - nn = xtest.shape[0] - - f = torch.zeros(nn, dtype=torch.float64) - - for j in range(nn): - xprobe = xtest[j, :].view(1, -1) - (K_star, K_star_star) = self.execute(xprobe) - (ymean, yvar) = self.mean_std(xprobe) - L = torch.sqrt(K_star_star + self.s * self.s * torch.eye(1, dtype=torch.float64) - yvar) - fprobe = ymean + L * torch.randn(1, dtype=torch.float64) - # add x and fprobe to the dataset and redo the whole - f[j] = fprobe - if not self.isin(xprobe): - self.x = torch.cat((self.x, xprobe), dim=0) - self.y = torch.cat((self.y, fprobe), dim=0) - - self.fit_gp(self.x, self.y) - - val, index = torch.max(f, dim=0) - self.fit_gp(xold, yold) - return (xtest[index, :], f[index]) - - else: - # Iterative without grid - - # get bounds - if self.bounds == None: - mybounds = tuple([(-self.diameter, self.diameter) for i in range(self.d)]) - else: - mybounds = self.bounds - [fun, grad] = self.get_lambdas_TH() - - results = [] - for j in range(multistart): - - # print ("Multistart:",j) - x0 = torch.randn(self.d, dtype=torch.float64) - for i in range(self.d): - x0[i].uniform_(mybounds[i][0], mybounds[i][1]) - - # simple coordnate-wise optimization - if minimizer == "coordinate-wise": - solution = x0 - for i in range(self.d): - xtest = torch.from_numpy(np.tile(x0, (grid, 1))) - xtest[:, i] = torch.linspace(mybounds[i][0], mybounds[i][1], grid) - sample = self.sample(xtest) - - ## Add to the posterior - self.x = torch.cat((self.x, xtest), dim=0) - self.y = torch.cat((self.y, sample), dim=0) - - # argmax - val, index = torch.max(sample, dim=0) - out = xtest[index, :] - - # fit new GP - self.fit_gp(self.x, self.y) - solution[i] = out[0, i] - - elif minimizer == "L-BFGS-B": - solution = np.random.randn(self.d) - xmax = [b[1] for b in mybounds] - xmin = [b[0] for b in mybounds] - bounds = MyBounds(xmax=xmax, xmin=xmin) - func = lambda x: fun(torch.from_numpy(x)).numpy()[0][0] - res = scipy.optimize.basinhopping(func, solution, disp=False, niter=grid, accept_test=bounds) - solution = torch.from_numpy(res.x) - - else: - raise AssertionError("Wrong optimizer selected.") - - results.append(torch.cat((solution, -fun(solution)[0]))) - self.x = xold - self.y = yold - self.fit_gp(self.x, self.y) - - results = torch.stack(results) - val, index = torch.max(results[:, -1], dim=0) - solution = results[index, 0:self.d].view(1, self.d) - self.x = xold - self.y = yold - self.fit_gp(self.x, self.y) - - return (solution, -fun(solution)) + def __init__( + self, + gamma=1, + s=0.001, + kappa=1.0, + kernel_name="squared_exponential", + diameter=1.0, + groups=None, + bounds=None, + nu=1.5, + kernel=None, + d=1, + power=2, + lam=1.0, + loss="squared", + huber_delta=1.35, + hyper="classical", + B=1.0, + svr_eps=0.1, + ): + """ + + :param gamma: Smoothnes parameter for squared exponential, laplace and matern kernel + :param s: level of noise + :param kernel: choose from a list + :param diameter: diameter of the set (deprecated) + :param groups: additive groups + :param bounds: bounds for the continuous optimization + :param v: parameter for matern kernel + """ + + ## GP properties + self.s = s + self.d = d + self.x = None + self.K = np.array([1.0]) + self.mu = 0.0 + self.lam = lam + self.total_bound = B + self.prob = 0.5 + self.svr_eps = svr_eps + self.safe = False + self.fitted = False + self.diameter = diameter + self.bounds = bounds + self.admits_first_order = False + self.back_prop = True + self.loss = loss + self.huber_delta = huber_delta + self.hyper = hyper + self.prepared_log_marginal = False + self.warm_start_solution = None + self.max_size = 10000 + ## kernel hyperparameters + if kernel is not None: + self.kernel_object = kernel + self.kernel = kernel.kernel + self.d = kernel.d + else: + self.kernel_object = KernelFunction( + kernel_name=kernel_name, + gamma=gamma, + nu=nu, + groups=groups, + kappa=kappa, + power=power, + d=d, + ) + self.kernel = self.kernel_object.kernel + + self.gamma = gamma + self.v = nu + self.groups = groups + self.kappa = kappa + self.custom = kernel + self.optkernel = kernel_name + + def residuals(self, x, y): + res = self.mean(x) - y + return res + + def description(self): + """ + Description of GP in text + :return: string with description + """ + return self.kernel_object.description() + "\nlambda=" + str(self.s) + + def embed(self, x): + return self.kernel_object.embed(x) + + def get_basis_size(self): + return self.kernel_object.get_basis_size() + + def make_safe(self, x): + """ + Make the input dataset numerically stable by removing duplicates? + :param x: + :return: + """ + self.epsilon = 0.001 + # remove vectors that are very close to each other + return x + + def add_data_point(self, x, y, Sigma=None): + + if self.x is not None: + self.x = torch.cat((self.x, x), dim=0) + self.y = torch.cat((self.y, y), dim=0) + if Sigma is None: + self.Sigma = torch.block_diag( + self.Sigma, torch.eye(x.size()[0], dtype=torch.double) * self.s + ) + else: + self.x = x + self.y = y + self.Sigma = Sigma + self.fit_gp(self.x, self.y, Sigma=self.Sigma) + + def fit(self, x=None, y=None): + if x is not None: + self.fit_gp(x, y) + else: + self.fit_gp(self.x, self.y) + + def lcb(self, xtest): + """ + Lower confidence bound + :return: + """ + mu, s = self.mean_std(xtest) + return mu - 2 * s + + def ucb(self, xtest): + """ + Upper confidence bound + :param xtest: + :return: + """ + mu, s = self.mean_std(xtest) + return mu + 2 * s + + def fit_gp(self, x, y, Sigma=None, iterative=False, extrapoint=False): + """ + Fits the Gaussian process, possible update is via iterative inverse + :param x: data x + :param y: values y + :param iterative: iterative inverse, where only last point of x is used + :param extrapoint: iterative inverse must be allowed, x is the only addition + :return: + """ + # first fit + try: + self.n, self.d = list(x.size()) + except: + self.n, self.d = x.shape + + if Sigma is None: + self.Sigma = (self.s) * torch.eye(self.n, dtype=torch.float64) + else: + self.Sigma = Sigma + + if self.fitted == False or iterative == False: + + if self.safe == True: + x = self.make_safe(x) + + self.x = x + self.y = y + self.K = self.kernel(x, x) + self.Sigma.T @ self.Sigma + self.fitted = True + else: + # iterative inverse + if iterative == True: + if extrapoint == False: + last_point = self.x[-1, :].view(1, -1) + else: + last_point = x + old_K = self.K + old_Kinv = self.Kinv + else: + pass + self.mean_std(x) + return None + + def norm(self): + if self.fitted: + val = torch.sqrt(self.A.T @ self.kernel(self.x, self.x) @ self.A) + return val + else: + return None + + def beta(self, delta=1e-3, norm=1): + """ + return concentration parameter given the current estimates + + :param delta: failure probability + :param norm: norm assumption + :return: + """ + beta_value = self.s * norm + torch.sqrt( + 2 * torch.log(1.0 / delta + torch.log(torch.det(self.K) / self.s**self.n)) + ) + return beta_value + + def execute(self, xtest): + """ + Calculates the covariance between data and xtest + :param xtest: + :return: + """ + if self.fitted == True: + K_star = self.kernel(self.x, xtest) + else: + K_star = None + K_star_star = self.kernel(xtest, xtest) + return (K_star, K_star_star) + + def _huber_fit(self, K_star, newK=None): + alpha = cp.Variable(self.n) + self.jitter = 10e-5 + if newK is None: + K = self.kernel(self.x, self.x) + self.jitter * torch.eye( + self.n, dtype=torch.float64 + ) + else: + K = newK.detach() + K = cp.atoms.affine.wraps.psd_wrap(K) + objective = cp.Minimize( + cp.sum( + cp.huber( + (K @ alpha - self.y.view(-1).numpy()) / self.s, M=self.huber_delta + ) + ) + + self.lam * cp.quad_form(alpha, K) + ) + prob = cp.Problem(objective) + prob.solve(solver=cp.MOSEK, enforce_dpp=False) + if K_star is not None: + return K_star @ torch.from_numpy(alpha.value).view(-1, 1) + else: + return torch.from_numpy(alpha.value).view(-1, 1) + + def _svr_fit(self, K_star, newK=None): + alpha = cp.Variable(self.n) + self.jitter = 10e-5 + if newK is None: + K = self.kernel(self.x, self.x) + self.jitter * torch.eye( + self.n, dtype=torch.float64 + ) + else: + K = newK.detach() + + K = cp.atoms.affine.wraps.psd_wrap(K) + objective = cp.Minimize(self.lam * cp.quad_form(alpha, K)) + constraints = [cp.abs(K @ alpha - self.y.view(-1).numpy()) <= self.svr_eps] + prob = cp.Problem(objective, constraints) + prob.solve(solver=cp.MOSEK, enforce_dpp=False) + if K_star is not None: + return K_star @ torch.from_numpy(alpha.value).view(-1, 1) + else: + return torch.from_numpy(alpha.value).view(-1, 1) + + def _unif_fit(self, K_star, newK=None): + alpha = cp.Variable((self.n, 1)) + self.jitter = 10e-5 + if newK is None: + K = self.kernel(self.x, self.x) + self.jitter * torch.eye( + self.n, dtype=torch.float64 + ) + else: + K = newK.detach() + + K = cp.atoms.affine.wraps.psd_wrap(K) + con = ( + 2 + * self.total_bound + * self.prob + / ((1 - self.prob) * np.sqrt(2 * np.pi * self.s**2)) + ) + objective = cp.Minimize( + cp.sum( + cp.logistic( + cp.square( + (K @ alpha - self.y.view(-1, 1).numpy()) / (np.sqrt(2) * self.s) + ) + + np.log(con) + ) + ) + + self.lam * cp.quad_form(alpha, K) + ) + prob = cp.Problem(objective) + prob.solve(solver=cp.MOSEK, enforce_dpp=False) + if K_star is not None: + return K_star @ torch.from_numpy(alpha.value).view(-1, 1) + else: + return torch.from_numpy(alpha.value).view(-1, 1) + + def _unif_fit_torch(self, K_star, newK=None, warm_start=None): + self.jitter = 10e-5 + if newK is None: + K = self.kernel(self.x, self.x) + self.jitter * torch.eye( + self.n, dtype=torch.float64 + ) + else: + K = newK.detach() + + con = ( + 2 + * self.total_bound + * self.prob + / ((1 - self.prob) * np.sqrt(2 * np.pi * self.s**2)) + ) + unif = ( + lambda alpha: torch.sum( + torch.log( + torch.exp( + ((K @ alpha - self.y.view(-1)) ** 2) / (2 * self.s**2) + + np.log(con) + ) + + 1 + ) + ) + + self.lam * alpha @ K @ alpha + ) + if warm_start is None: + x_init = torch.zeros(size=(self.n, 1)).view(-1).double() + else: + x_init = warm_start.view(-1) + + res = minimize_torch( + unif, + x_init, + method="l-bfgs", + tol=1e-3, + disp=0, + options={"max_iter": 200, "gtol": 1e-3}, + ) + alpha = res.x + + if K_star is not None: + return K_star @ alpha.view(-1, 1) + else: + return alpha.view(-1, 1) + + def _huber_fit_torch(self, K_star, newK=None): + self.jitter = 10e-5 + if newK is None: + K = self.kernel(self.x, self.x) + self.jitter * torch.eye( + self.n, dtype=torch.float64 + ) + else: + K = newK + L = torch.linalg.cholesky(K) + + huber = ( + lambda beta: torch.nn.functional.huber_loss( + L @ beta / self.s, + self.y.view(-1) / self.s, + reduction="sum", + delta=self.huber_delta, + ) + + self.lam * beta @ beta + ) + # x_init = torch.linalg.solve(L.T@L+torch.eye(self.n).double()*self.s**2*self.lam, self.y) + x_init = torch.zeros(size=(self.n, 1)).view(-1).double() + res = minimize_torch( + huber, + x_init, + method="l-bfgs", + tol=1e-4, + disp=0, + options={"max_iter": 10**3, "gtol": 1e-4}, + ) + alpha = torch.linalg.solve(L, res.x) + if K_star is not None: + return K_star @ alpha.view(-1, 1) + else: + return alpha.view(-1, 1) + + def mean_std(self, xtest, full=False, reuse=False): + if xtest.size()[0] < self.max_size: + return self.mean_std_sub(xtest, full=full, reuse=reuse) + else: + stepby = self.max_size + mu = torch.zeros(size=(xtest.size()[0], 1)).double() + std = torch.zeros(size=(xtest.size()[0], 1)).double() + + # first + i = 0 + mu[i * stepby : (i + 1) * stepby], std[i * stepby : (i + 1) * stepby] = ( + self.mean_std_sub(xtest[i * stepby : (i + 1) * stepby, :], reuse=False) + ) + + for i in np.arange(1, xtest.size()[0] // stepby, 1): + print(i, "/", xtest.size()[0] // stepby) + ( + mu[i * stepby : (i + 1) * stepby], + std[i * stepby : (i + 1) * stepby], + ) = self.mean_std_sub( + xtest[i * stepby : (i + 1) * stepby, :], reuse=True + ) + + # last + if xtest.size()[0] % stepby > 0: + ( + mu[xtest.size()[0] - xtest.size()[0] % stepby :], + std[xtest.size()[0] - xtest.size()[0] % stepby :], + ) = self.mean_std_sub( + xtest[xtest.size()[0] - xtest.size()[0] % stepby :, :], reuse=True + ) + + return mu, std + + def mean_std_sub(self, xtest, full=False, reuse=False): + """ + Return posterior mean and variance as tuple + :param xtest: grid, numpy array (2D) + :param full: Instead of just poinwise variance, full covariance can be outputed (bool) + :return: (tensor,tensor) + """ + if full: + (K_star, K_star_star) = self.execute(xtest) + else: + K_star = self.kernel(self.x, xtest) + diag_K_star_star = torch.hstack( + [ + self.kernel(xtest[i, :].view(1, -1), xtest[i, :].view(1, -1)).view( + 1 + ) + for i in range(xtest.size()[0]) + ] + ) + + if self.fitted == False: + # the process is not fitted + + if full == False: + x = torch.sum(xtest, dim=1) + # first = torch.diag(K_star_star).view(-1, 1) + first = diag_K_star_star.view(-1, 1) + variance = first + yvar = torch.sqrt(variance) + else: + x = torch.sum(xtest, dim=1) + first = K_star_star + yvar = first + + return (0 * x.view(-1, 1), yvar) + + else: + + if self.back_prop == False: + if reuse == False: + # self.decomp = torch.lu(self.K.unsqueeze(0)) + self.LU, self.pivot = torch.linalg.lu_factor(self.K.unsqueeze(0)) + # self.A = torch.lu_solve(self.y.unsqueeze(0), *self.decomp)[0, :, :] + self.A = torch.linalg.lu_solve( + self.LU, self.pivot, self.y.unsqueeze(0) + )[0, :, :] + self.B = torch.t( + torch.linalg.lu_solve( + self.LU, self.pivot, torch.t(K_star).unsqueeze(0) + )[0, :, :] + ) + else: + if reuse == False: + self.A = torch.linalg.lstsq(self.K, self.y)[0] + # self.B = torch.t(torch.linalg.solve(self.K, torch.t(K_star))) + self.B = torch.t(torch.linalg.lstsq(self.K, torch.t(K_star))[0]) + + if self.loss == "squared": + ymean = torch.mm(K_star, self.A) + elif self.loss == "huber": + ymean = self._huber_fit(K_star) + elif self.loss == "svr": + ymean = self._svr_fit(K_star) + elif self.loss == "unif" or self.loss == "unif_new": + ymean = self._unif_fit_torch(K_star) + else: + raise AssertionError("Loss function not implemented.") + + if full == False: + first = diag_K_star_star.view(-1, 1) + second = torch.einsum("ij,ji->i", (self.B, torch.t(K_star))).view(-1, 1) + variance = first - second + yvar = torch.sqrt(variance) + else: + first = K_star_star + second = torch.mm(self.B, torch.t(K_star)) + yvar = first - second + + return (ymean, yvar) + + def mean(self, xtest): + """ + Calculates the mean prediction over a specific input space + :param xtest: input + :return: + """ + K_star = self.kernel(self.x, xtest) + + if self.loss == "squared": + ymean = torch.mm(K_star, self.A) + elif self.loss == "huber": + ymean = self._huber_fit(K_star) + else: + raise AssertionError("Loss function not implemented.") + + return ymean + + def gradient_mean_var(self, point, hessian=True): + """ + Can calculate gradient at single point atm. + + :param point: + :return: + """ + + # mean + point.requires_grad_(True) + mu = self.mean_std(point)[0] + nabla_mu = grad(mu, point, create_graph=True)[0][0] + + if hessian == True: + # variance + H = self.kernel_object.get_2_der(point) + C = self.kernel_object.get_1_der(point, self.x) + + V = H - torch.t(C) @ self.K @ C + + return [nabla_mu, V] + else: + return nabla_mu + + def mean_gradient_hessian(self, xtest, hessian=False): + hessian_mu = torch.zeros(size=(self.d, self.d), dtype=torch.float64) + xtest.requires_grad_(True) + # xtest.retain_grad() + mu = self.mean_std(xtest)[0] + # mu.backward(retain_graph=True) + + # nabla_mu = xtest.grad + nabla_mu = grad(mu, xtest, create_graph=True)[0][0] + + if hessian == False: + return nabla_mu + else: + for i in range(self.d): + hessian_mu[i, :] = grad( + nabla_mu[i], xtest, create_graph=True, retain_graph=True + )[0][0] + return [nabla_mu, hessian_mu] + + def sample(self, xtest, size=1, jitter=10e-8): + """ + Samples Path from GP, return a numpy array evaluated over grid + :param xtest: grid + :param size: number of samples + :return: numpy array + """ + nn = list(xtest.size())[0] + + if self.fitted == True: + (ymean, yvar) = self.mean_std(xtest, full=True) + Cov = yvar + 10e-10 * torch.eye(nn, dtype=torch.float64) + L = torch.linalg.cholesky(Cov) + # L = torch.from_numpy(np.linalg.cholesky(Cov.numpy())) + random_vector = torch.normal( + mean=torch.zeros(nn, size, dtype=torch.float64), std=1.0 + ) + f = ymean + torch.mm(L, random_vector) + else: + (K_star, K_star_star) = self.execute(xtest) + L = torch.linalg.cholesky( + K_star_star + jitter * torch.eye(nn, dtype=torch.float64) + ) + random_vector = torch.normal( + mean=torch.zeros(nn, size, dtype=torch.float64), std=1.0 + ) + f = self.mu + torch.mm(L, random_vector) + return f + + def sample_and_max(self, xtest, size=1): + """ + Samples Path from GP and takes argmax + :param xtest: grid + :param size: number of samples + :return: (argmax, max) + """ + f = self.sample(xtest, size=size) + self.temp = f + val, index = torch.max(f, dim=0) + return (xtest[index, :], val) + + def log_marginal(self, kernel, X, weight): + + if self.loss == "squared": + return self._log_marginal_squared(kernel, X, weight) + elif self.loss == "unif_new": + return self._log_marginal_unif(kernel, X, weight) + else: + return self._log_marginal_map(kernel, X, weight) + + def _log_marginal_unif(self, kernel, X, weight): + if not self.prepared_log_marginal: + self._prepare_log_marginal_unif() + + func = kernel.get_kernel() + self.jitter = 10e-4 + K = ( + func(self.x, self.x, **X) + + torch.eye(self.n, dtype=torch.float64) * self.jitter + ) + # print ("Kernel") + # print (K) + L = torch.linalg.cholesky(K) + self.L_unif.value = L.data.numpy() + + self.prob_unif.solve(solver=cp.MOSEK, enforce_dpp=False, warm_start=True) + + solution = ( + torch.zeros(size=(self.n, 1), requires_grad=True).reshape(-1).double() + ) + solution.data = torch.from_numpy(self.beta_unif.value) + con = ( + 2 + * self.total_bound + * self.prob + / ((1 - self.prob) * np.sqrt(2 * np.pi * self.s**2)) + ) + + loglikelihood = ( + lambda beta: torch.sum( + torch.log( + torch.exp( + ((L @ beta - self.y.view(-1)) ** 2) / (2 * self.s**2) + + np.log(con) + ) + + 1 + ) + ) + + self.lam * beta.T @ beta + ) + + H = hessian(loglikelihood)(solution) + logdet = -0.5 * torch.slogdet(H)[1] * weight + logprob = -0.5 * loglikelihood(solution) + logdet + logprob = -logprob + return logprob + + def _prepare_log_marginal_unif(self): + + self.beta_unif = cp.Variable(self.n) + self.L_unif = cp.Parameter((self.n, self.n)) + + con = ( + 2 + * self.total_bound + * self.prob + / ((1 - self.prob) * np.sqrt(2 * np.pi * self.s**2)) + ) + # self.objective_unif = cp.Minimize(cp.sum(cp.logistic(cp.square( + # (self.K_unif @ self.alpha_unif - self.y.view(-1).numpy()) / (np.sqrt(2) * self.s)) + np.log(con))) + self.lam * cp.quad_form( + # self.alpha_unif, self.L)) + self.objective_unif = cp.Minimize( + cp.sum( + cp.logistic( + cp.square( + (self.L_unif @ self.beta_unif - self.y.view(-1).numpy()) + / (np.sqrt(2) * self.s) + ) + + np.log(con) + ) + ) + + self.lam * cp.sum_squares(self.beta_unif) + ) + self.prob_unif = cp.Problem(self.objective_unif) + self.prepared_log_marginal = True + + def _prepare_log_marginal_huber(self): + beta = cp.Variable(self.n) + L = cp.Parameter((self.n, self.n)) + + objective = cp.Minimize( + cp.sum( + cp.huber( + (L @ beta - self.y.view(-1).numpy()) / self.s, M=self.huber_delta + ) + ) + + self.lam * cp.sum_squares(beta) + ) + + prob = cp.Problem(objective) + cvxpylayer = CvxpyLayer(prob, parameters=[L], variables=[beta]) + self.prepared_log_marginal = True + print("cvxpy-layer has been initialized.") + return cvxpylayer + + def _log_marginal_huber_cvxpy(self, kernel, X, weight): + func = kernel.get_kernel() + self.jitter = 10e-4 + L_tch = torch.linalg.cholesky( + func(self.x, self.x, **X) + + torch.eye(self.n, dtype=torch.float64) * self.jitter + ) + + if not self.prepared_log_marginal: + self._cvxpylayer = self._prepare_log_marginal_huber() + solution = self._cvxpylayer(L_tch)[0] + + huber = ( + lambda beta: torch.nn.functional.huber_loss( + L_tch @ beta / self.s, + self.y.view(-1) / self.s, + reduction="sum", + delta=self.huber_delta, + ) + + self.lam * beta.T @ beta + ) + H = torch.autograd.functional.hessian(huber, solution) + + logdet = -0.5 * torch.slogdet(H)[1] * weight + logprob = -0.5 * huber(solution) + logdet + logprob = -logprob + return logprob + + def _log_marginal_map(self, kernel, X, weight): + # this implementation uses Danskin theorem to simplify gradient propagation + func = kernel.get_kernel() + self.jitter = 10e-4 + K_tch = ( + func(self.x, self.x, **X) + + torch.eye(self.n, dtype=torch.float64) * self.jitter + ) + + # solve + solution = ( + torch.zeros(size=(self.n, 1), requires_grad=True).reshape(-1).double() + ) + if self.warm_start_solution is None: + self.warm_start_solution = solution.clone() + + if self.loss == "huber": + alpha = self._huber_fit(None, newK=K_tch).detach() + loglikelihood = ( + lambda alpha: torch.nn.functional.huber_loss( + K_tch @ alpha / self.s, + self.y.view(-1) / self.s, + reduction="sum", + delta=self.huber_delta, + ) + + self.lam * alpha.T @ K_tch @ alpha + ) + + solution.data = alpha.reshape(-1).data + self.warm_start_solution.data = solution.data + mask = torch.abs(K_tch @ alpha - self.y) / self.s < self.huber_delta + mask = mask.view(-1).double() + D = torch.diag(mask) + H = K_tch @ D @ K_tch + 2 * self.lam * K_tch + + elif self.loss == "svr": + alpha = self._svr_fit(None, newK=K_tch).detach() + + loglikelihood = ( + lambda alpha: torch.sum( + torch.abs(K_tch @ alpha - self.y.view(-1)) + * (K_tch @ alpha - self.y.view(-1) > self.svr_eps).int() + ) + + self.lam * alpha.T @ K_tch @ alpha + ) + + solution.data = alpha.reshape(-1).data + self.warm_start_solution.data = solution.data + H = torch.autograd.functional.hessian(loglikelihood, solution) + + elif self.loss == "unif": + alpha = self._unif_fit_torch(None, newK=K_tch).detach() + con = ( + 2 + * self.total_bound + * self.prob + / ((1 - self.prob) * np.sqrt(2 * np.pi * self.s**2)) + ) + + loglikelihood = ( + lambda alpha: torch.sum( + torch.log( + torch.exp( + ((K_tch @ alpha - self.y.view(-1)) ** 2) / (2 * self.s**2) + + np.log(con) + ) + + 1 + ) + ) + + self.lam * alpha @ K_tch @ alpha + ) + # v = lambda alpha : torch.sum(torch.exp( ((K_tch@alpha-self.y.view(-1))**2)/(2*self.s**2) + np.log(con) )) + solution.data = alpha.reshape(-1).data + self.warm_start_solution.data = solution.data + H = hessian(loglikelihood)(solution) + + logdet = -0.5 * torch.slogdet(H)[1] * weight + logprob = -0.5 * loglikelihood(solution) + logdet + logprob = -logprob + return logprob + + def _log_marginal_squared(self, kernel, X, weight): + func = kernel.get_kernel() + K = ( + func(self.x, self.x, **X) + + torch.eye(self.n, dtype=torch.float64) * self.s * self.s + ) + logdet = -0.5 * torch.slogdet(K)[1] * weight + alpha = torch.linalg.solve(K, self.y) + logprob = -0.5 * torch.mm(torch.t(self.y), alpha) + logdet + logprob = -logprob + return logprob + + def optimize_params( + self, + type="bandwidth", + restarts=10, + regularizer=None, + maxiter=1000, + mingradnorm=1e-4, + verbose=False, + optimizer="pymanopt", + scale=1.0, + weight=1.0, + save=False, + save_name="model.np", + init_func=None, + bounds=None, + parallel=False, + cores=None, + ): + + # Spectral norm regularizer + if regularizer is not None: + if regularizer[0] == "spectral_norm": + regularizer_func = lambda S: regularizer[1] * torch.norm( + 1 / S[0], p="nuc" + ) + elif regularizer[0] == "lasso": + regularizer_func = lambda S: regularizer[1] * torch.norm(1 / S[0], p=1) + else: + regularizer_func = None + else: + regularizer_func = None + + if type == "bandwidth": + params = {} + for key, dict2 in self.kernel_object.params_dict.items(): + if "gamma" in dict2.keys(): + params[key] = {"gamma": (init_func, Euclidean(1), bounds)} + elif "ard_gamma" in dict2.keys(): + params[key] = { + "ard_gamma": (init_func, Euclidean(len(dict2["group"])), bounds) + } + + elif type == "bandwidth+noise": + params = {} + init_func_noise = lambda x: self.s + for key, dict2 in self.kernel_object.params_dict.items(): + + if "gamma" in dict2.keys(): + params[key] = {"gamma": (init_func, Euclidean(1), bounds)} + + elif "ard_gamma" in dict2.keys(): + params[key] = { + "ard_gamma": (init_func, Euclidean(len(dict2["group"])), bounds) + } + + params["likelihood"] = {"sigma": (init_func_noise, Euclidean(1), None)} + + elif type == "rots": + params = {} + d = int(self.kernel_object.d) + for key, dict2 in self.kernel_object.params_dict.items(): + if "rot" in dict2.keys(): + params[key] = {"rot": (None, Stiefel(d, d), None)} + elif type == "groups": + params = {} + optimizer = "discrete" + d = self.kernel_object.d + for key, dict2 in self.kernel_object.params_dict.items(): + if "groups" in dict2.keys(): + params[key] = {"groups": (None, helper.generate_groups(d), None)} + pass + elif type == "covariance": + params = {} + d = int(self.kernel_object.d) + for key, dict2 in self.kernel_object.params_dict.items(): + if "cov" in dict2.keys(): + params[key] = {"cov": (None, PSDFixedRank(d, d), None)} + else: + raise AttributeError("This quick-optimization is not implemented.") + + self.optimize_params_general( + params=params, + restarts=restarts, + optimizer=optimizer, + regularizer_func=regularizer_func, + maxiter=maxiter, + mingradnorm=mingradnorm, + verbose=verbose, + scale=scale, + weight=weight, + save=save, + save_name=save_name, + parallel=parallel, + cores=cores, + ) + + def log_probability(self, xtest, sample): + from scipy.stats import multivariate_normal + + mu, covar = self.mean_std(xtest, full=True) + p = np.log( + multivariate_normal.pdf( + sample.view(-1).numpy(), mean=mu.view(-1).numpy(), cov=covar.numpy() + ) + ) + return p + + def volume_mean_cvxpy( + self, + xtest, + weights=None, + eps=10e-2, + tol=10e-14, + max_weight=1, + max_iter=1000, + verbose=False, + scale=10e-4, + slope=1.0, + bisections=10, + B="auto", + optimal_scale=None, + optimize_scale=False, + relax="relu", + ): + + n = self.x.size()[0] + K = self.get_kernel() # (self.x, self.x) + Kinv = torch.pinverse(K + eps * torch.eye(K.size()[0]).double()).numpy() + if weights is None: + weights = torch.ones(self.x.size()[0]) / n + if B == "auto": + alpha, _ = torch.lstsq(self.y, K) + beta = K @ alpha + B = beta.T @ Kinv @ beta + print("Auto:B", B) + + def fun(scale_arg): + beta = cp.Variable(n) + if relax == "relu": + loss_fn_transformed = cp.sum( + cp.pos( + weights + * slope + * (cp.abs(beta - self.y.numpy().reshape(-1)) - eps) + ) + ) + 0.5 * scale_arg * cp.quad_form(beta, Kinv) + elif relax == "log": + loss_fn_transformed = cp.sum( + cp.logistic( + weights + * slope + * (cp.abs(beta - self.y.numpy().reshape(-1)) - eps) + ) + ) + 0.5 * scale_arg * cp.quad_form(beta, Kinv) + + # loss_fn_transformed = cp.sum(weights*logit(slope*(cp.abs(beta - self.y.numpy().reshape(-1)) -eps))) + 0.5*scale_arg*cp.quad_form(beta, Kinv)- + + prob = cp.Problem(cp.Minimize(loss_fn_transformed)) + # prob.solve(solver=cp.MOSEK, feastol=tol, verbose=False) + prob.solve(solver=cp.MOSEK, verbose=False) + if verbose == True: + print( + "scale:", + scale_arg, + "cond:", + np.linalg.cond(Kinv), + "sub.", + beta.value.T @ Kinv @ beta.value - B, + "B:", + B, + ) + return beta.value.T @ Kinv @ beta.value - B + + if optimize_scale: + return helper.bisection(fun, 0.0, max_weight, bisections) + + if optimal_scale is None: + scale_star = helper.bisection(fun, 0.0, max_weight, bisections) + else: + scale_star = optimal_scale + + beta = cp.Variable(n) + if relax == "relu": + loss_fn_transformed = cp.sum( + weights + * cp.pos(slope * (cp.abs(beta - self.y.numpy().reshape(-1)) - eps)) + ) + 0.5 * scale_star * cp.quad_form(beta, Kinv) + elif relax == "log": + loss_fn_transformed = cp.sum( + weights + * cp.logistic(slope * (cp.abs(beta - self.y.numpy().reshape(-1)) - eps)) + ) + 0.5 * scale_star * cp.quad_form(beta, Kinv) + prob = cp.Problem(cp.Minimize(loss_fn_transformed)) + # prob.solve(solver=cp.CVXOPT, feastol=tol, verbose=verbose) + prob.solve(solver=cp.MOSEK, verbose=verbose) + beta_torch = torch.from_numpy(beta.value).view(-1, 1) + alpha = torch.from_numpy(Kinv) @ beta_torch + ytest = self.kernel(self.x, xtest) @ alpha + return ytest + + def volume_mean( + self, + xtest, + weights=None, + eps=10e-2, + tol=10e-6, + max_iter=1000, + verbose=False, + eta_start=0.01, + eta_decrease=0.9, + scale=1, + slope=1.0, + warm=True, + relax="relu", + norm=False, + B="auto", + ): + self.scale = scale + self.relax = relax + + K = self.get_kernel() # (self.x, self.x) + Kinv = torch.pinverse(K) + + if weights is None: + weights = torch.ones(self.x.size()[0]) + else: + weights[weights < 10e-6] = 0.0 # * self.x.size()[0] + weights = weights.view(-1) + if warm == True: + # warm start with L2 fit + alpha, _ = torch.lstsq(self.y, K) + beta = K @ alpha + else: + beta = torch.randn(size=(self.n, 1)).double() # .requires_grad_(True)*0 + + # loss_fn_original = lambda alpha: torch.sum(torch.relu(torch.abs(K @ alpha - self.y) -eps)) + 0.5*self.s * alpha.T @ K @ alpha + if self.relax == "relu": + loss_fn_transformed = ( + lambda beta: torch.sum(torch.relu(torch.abs(beta - self.y) - eps)) + + self.scale * 0.5 * self.s * beta.T @ Kinv @ beta + ) + + elif self.relax == "tanh": + self.slope = slope + tanh = lambda x: (torch.tanh(self.slope * x) + 1) * 0.5 + loss_fn_transformed = ( + lambda beta: torch.sum( + weights * tanh(torch.abs(beta - self.y) - eps).view(-1) + ) + + 0.5 * self.s * self.scale * beta.T @ Kinv @ beta + ) + + elif self.relax == "elu": + self.slope = slope + elu = lambda x: torch.nn.elu(x, alpha=self.slope) + loss_fn_transformed = ( + lambda beta: torch.sum(elu(torch.abs(beta - self.y) - eps)) + + 0.5 * self.s * self.scale * beta.T @ Kinv @ beta + ) + + elif self.relax == "relu": + return self.volume_mean_cvxpy( + xtest, weights=weights, eps=eps, scale=scale, tol=tol + ) + else: + raise AssertionError("Unkown relaxation.") + + current_loss = 10e10 + eta = eta_start + for i in range(max_iter): + grad = self.s * (Kinv @ beta) + beta = self.proximal(beta, grad, eta, eps, weights) + past_loss = current_loss + current_loss = loss_fn_transformed(beta) + if current_loss > past_loss: + eta = eta * eta_decrease + elif np.abs(current_loss - past_loss) < tol: + break + + # print (i, beta.T) + if verbose == True: + print(i, loss_fn_transformed(beta), eta) + + print("final norm:", beta.T @ Kinv @ beta) + + # alpha = torch.inverse(self.K) @ beta + alpha = torch.pinverse(K) @ beta + # alpha = torch.lstsq(K,beta) + ytest = self.kernel(self.x, xtest) @ alpha + # max = torch.max(torch.abs(beta - self.y)) + if norm == True: + return beta.T @ Kinv @ beta + # yz = self.kernel(self.x, self.x) @ alpha + # approx_v = torch.sum(torch.relu(torch.abs(beta - self.y) -eps))/max + # approx_p = approx_v/self.n + # mask = (torch.abs(yz[:,0] - self.y[:,0])) > eps + # approx_p = float(torch.sum(mask))/float(self.n) + return ytest # ,approx_p + + def volume_mean_norm( + self, + xtest, + weights=None, + eps=10e-2, + tol=10e-6, + max_iter=1000, + verbose=False, + eta_start=0.01, + eta_decrease=0.9, + scale=1, + slope=1.0, + warm=True, + relax="relu", + B="auto", + ): + K = self.kernel(self.x, self.x) + Kinv = torch.pinverse(K) + if B == "auto": + alpha, _ = torch.lstsq(self.y, self.K) + beta = K @ alpha + B = beta.T @ Kinv @ beta + + func = ( + lambda s: self.volume_mean( + xtest, + weights=weights, + eps=eps, + tol=tol, + max_iter=max_iter, + verbose=verbose, + eta_start=eta_start, + eta_decrease=eta_decrease, + scale=s, + slope=slope, + warm=warm, + relax=relax, + norm=True, + ) + - B + ) + + s_star = stpy.optim.custom_optimizers.bisection(func, 0.0, 1000.0, 10) + + return self.volume_mean( + xtest, + weights=weights, + eps=eps, + tol=tol, + max_iter=max_iter, + verbose=verbose, + eta_start=eta_start, + eta_decrease=eta_decrease, + scale=s_star, + slope=slope, + warm=warm, + relax=relax, + norm=False, + ) + + def proximal(self, beta, nabla, eta, eps, weights): + res = beta + for i in range(self.n): + from scipy.optimize import minimize + + b = float(beta[i, :]) + y = float(self.y[i, :]) + g = float(nabla[i, :]) + w = float(weights[i]) + # s = float(self.s) + + tanh = lambda x: (np.tanh(self.slope * x) + 1) * 0.5 + elu = lambda x: torch.elu(x, alpha=self.slope).numpy() + + if self.relax == "relu": + loss_reg = lambda x: w * np.maximum(0, np.abs(x - y) - eps) + elif self.relax == "tanh": + loss_reg = lambda x: w * tanh(np.abs(x - y) - eps) + elif self.relax == "elu": + loss_reg = lambda x: w * elu(np.abs(x - y) - eps) + else: + raise AssertionError("Unkown relaxation.") + + loss_scalar = lambda x: ( + (1 / (2.0 * eta)) * (x - (b - eta * g)) ** 2 + ) + loss_reg(x) + + x0 = np.array([0.0]) + # print (minimize(loss_scalar,x0,method ='nelder-mead').x) + res[i, :] = float(minimize(loss_scalar, x0, method="nelder-mead").x) + return res + + def get_lambdas(self, beta, mean=False): + """ + Gets lambda function to evaluate acquisiton function and its derivative + :param beta: beta in GP-UCB + :return: [lambda,lambda] + """ + mean = lambda x: self.mean_std(x.reshape(1, -1), reuse=True)[0][0][0] + sigma = lambda x: self.mean_std(x.reshape(1, -1), reuse=True)[1][0][0] + + if mean == True: + return [mean, sigma] + else: + fun = lambda x: -(mean(x) + np.sqrt(beta) * sigma(x)) + grad = lambda x: -complex_step_derivative(fun, 1e-10, x.reshape(1, -1)) + + return [fun, grad] + + def get_kernel(self): + return self.K + + def ucb_optimize(self, beta, multistart=25, lcb=False): + """ + Optimizes UCB acquisiton function and return next point and its value as output + :param beta: beta from GP UCB + :param multistart: number of starts + :return: (next_point, value at next_point) + """ + + mean = lambda x: self.mean_std(x, reuse=True)[0][0][0] + sigma = lambda x: self.mean_std(x, reuse=True)[1][0][0] + + ucb = lambda x: torch.dot( + torch.Tensor([1.0, np.sqrt(beta)]), + torch.Tensor( + [ + self.mean_std(x, reuse=True)[0][0][0], + self.mean_std(x, reuse=True)[1][0][0], + ] + ), + ) + lcb = lambda x: torch.dot( + torch.Tensor([1.0, np.sqrt(beta)]), + torch.Tensor( + [ + self.mean_std(x, reuse=True)[0][0][0], + -self.mean_std(x, reuse=True)[1][0][0], + ] + ), + ) + + if lcb == False: + fun2 = lambda x: -ucb(torch.from_numpy(x).view(1, -1)).numpy() + else: + fun2 = lambda x: -lcb(torch.from_numpy(x).view(1, -1)).numpy() + fun = lambda x: -( + mean(torch.from_numpy(x).view(1, -1)) + + np.sqrt(beta) * sigma(torch.from_numpy(x).view(1, -1)) + ) + + self.back_prop = False + self.mean_std(self.x) + + mybounds = self.bounds + + results = [] + + from scipy.optimize import minimize + + for i in range(multistart): + x0 = np.random.randn(self.d) + for i in range(self.d): + x0[i] = np.random.uniform(mybounds[i][0], mybounds[i][1]) + + res = minimize( + fun2, x0, method="L-BFGS-B", jac=None, tol=0.000001, bounds=mybounds + ) + solution = res.x + results.append([solution, -fun(solution)]) + + results = np.array(results) + index = np.argmax(results[:, 1]) + solution = results[index, 0] + + return (torch.from_numpy(solution), -fun(solution)) + + def isin(self, xnext): + self.epsilon = 0.001 + for v in self.x: + if torch.norm(v - xnext, p=2) < self.epsilon: + return True + + def sample_and_condition(self, x): + xprobe = x.view(1, -1) + fprobe = self.sample(xprobe) + if not self.isin(xprobe): + self.x = torch.cat((self.x, xprobe), dim=0) + self.y = torch.cat((self.y, fprobe), dim=0) + self.fit_gp(self.x, self.y) + return -fprobe + + def get_lambdas_TH(self): + fun = lambda x: self.sample_and_condition(x) + grad = None + return [fun, grad] + + def sample_iteratively_max( + self, xtest, multistart=20, minimizer="coordinate-wise", grid=100 + ): + """ + Samples Path from GP and takes the maximum iteratively + :param xtest: grid + :param size: number of samples + :return: numpy array + """ + # print ("Iterative:",multistart,minimizer,grid) + from scipy.optimize import minimize + + # old stuff + xold = self.x + yold = self.y + + # with fixed grid + if xtest is not None: + # number of samples + nn = xtest.shape[0] + + f = torch.zeros(nn, dtype=torch.float64) + + for j in range(nn): + xprobe = xtest[j, :].view(1, -1) + (K_star, K_star_star) = self.execute(xprobe) + (ymean, yvar) = self.mean_std(xprobe) + L = torch.sqrt( + K_star_star + + self.s * self.s * torch.eye(1, dtype=torch.float64) + - yvar + ) + fprobe = ymean + L * torch.randn(1, dtype=torch.float64) + # add x and fprobe to the dataset and redo the whole + f[j] = fprobe + if not self.isin(xprobe): + self.x = torch.cat((self.x, xprobe), dim=0) + self.y = torch.cat((self.y, fprobe), dim=0) + + self.fit_gp(self.x, self.y) + + val, index = torch.max(f, dim=0) + self.fit_gp(xold, yold) + return (xtest[index, :], f[index]) + + else: + # Iterative without grid + + # get bounds + if self.bounds == None: + mybounds = tuple( + [(-self.diameter, self.diameter) for i in range(self.d)] + ) + else: + mybounds = self.bounds + [fun, grad] = self.get_lambdas_TH() + + results = [] + for j in range(multistart): + + # print ("Multistart:",j) + x0 = torch.randn(self.d, dtype=torch.float64) + for i in range(self.d): + x0[i].uniform_(mybounds[i][0], mybounds[i][1]) + + # simple coordnate-wise optimization + if minimizer == "coordinate-wise": + solution = x0 + for i in range(self.d): + xtest = torch.from_numpy(np.tile(x0, (grid, 1))) + xtest[:, i] = torch.linspace( + mybounds[i][0], mybounds[i][1], grid + ) + sample = self.sample(xtest) + + ## Add to the posterior + self.x = torch.cat((self.x, xtest), dim=0) + self.y = torch.cat((self.y, sample), dim=0) + + # argmax + val, index = torch.max(sample, dim=0) + out = xtest[index, :] + + # fit new GP + self.fit_gp(self.x, self.y) + solution[i] = out[0, i] + + elif minimizer == "L-BFGS-B": + solution = np.random.randn(self.d) + xmax = [b[1] for b in mybounds] + xmin = [b[0] for b in mybounds] + bounds = MyBounds(xmax=xmax, xmin=xmin) + func = lambda x: fun(torch.from_numpy(x)).numpy()[0][0] + res = scipy.optimize.basinhopping( + func, solution, disp=False, niter=grid, accept_test=bounds + ) + solution = torch.from_numpy(res.x) + + else: + raise AssertionError("Wrong optimizer selected.") + + results.append(torch.cat((solution, -fun(solution)[0]))) + self.x = xold + self.y = yold + self.fit_gp(self.x, self.y) + + results = torch.stack(results) + val, index = torch.max(results[:, -1], dim=0) + solution = results[index, 0 : self.d].view(1, self.d) + self.x = xold + self.y = yold + self.fit_gp(self.x, self.y) + + return (solution, -fun(solution)) if __name__ == "__main__": - from stpy.helpers.helper import interval - # domain size - L_infinity_ball = 1 - # dimension - d = 1 - # error variance - s = torch.from_numpy(np.array(1.0, dtype=np.float64)) - - # grid density - n = 1024 - # number of intial points - N = 32 - # smoothness - gamma = 0.1 - # test problem - - xtest = torch.from_numpy(interval(n, d)) - # x = torch.from_numpy(np.random.uniform(-L_infinity_ball,L_infinity_ball, size = (N,d))) - x = torch.from_numpy(interval(N, 1)) - f_no_noise = lambda q: torch.sin(torch.sum(q * 4, dim=1)).view(-1, 1) - f = lambda q: f_no_noise(q) + torch.normal(mean=torch.zeros(q.size()[0], 1, dtype=torch.float64), std=1., - out=None) * s * s - # targets - y = f(x) - - # GP model with squared exponential - kernel = KernelFunction(kernel_name = "ard", gamma = torch.ones(d, dtype = torch.float64)*gamma , groups = [[0],[1]]) - # kernel = KernelFunction(kernel_name="ard", gamma=torch.ones(1, dtype=torch.float64) * gamma, groups=[[0]]) - GP = GaussianProcess(s=s, d=1) - - # fit GP - # x = x.numpy() - GP.fit_gp(x, y) - # get mean and variance of GP - [mu, std] = GP.mean_std(xtest) - - # print ("Log probability:", GP.log_marginal_likelihood() ) - # mu_inf = GP.chebyshev_mean(xtest) - eps = 0.1 - - mu_vol = GP.volume_mean_cvxpy(xtest, eps=eps, verbose=True, scale=1., slope=1., tol=10e-9) - - GP.visualize(xtest, f_true=f_no_noise, show=False) - plt.plot(xtest.numpy(), mu_vol.detach().numpy(), label="Least-Volume-ReLu", lw=2) - for slope in [0.001, 0.01, 0.1, 1., 10., 100., 1000., 10000.]: - # mu_vol_log = GP.volume_mean_cvxpy(xtest, eps=eps, verbose=True, scale=1., slope=slope, tol=10e-9, relax = 'log', B = 1000) - # plt.plot(xtest.numpy(),mu_vol_log.detach().numpy(), '--',label = "Least-Volume-Log" + str(slope), lw = 2) - mu_vol_tanh = GP.volume_mean(xtest, eps=eps, verbose=True, eta_start=0.1, eta_decrease=0.1, scale=1., - slope=slope, - tol=0.01, warm=True, relax='tanh') - plt.plot(xtest.numpy(), mu_vol_tanh.detach().numpy(), '-.', label="Least-Volume-Tanh" + str(slope), lw=2) - # print (slope, np.sum(np.abs(mu_vol_log) 0: - # something to add via low rank update - for i in range(len(self.to_add)): - newx = self.to_add[i][0] - newy = self.to_add[i][1] - - # rank one update - emb = self.embed(newx) - - if self.dual: # via Shur complements - newKinv = torch.zeros(size=(self.n + 1, self.n + 1)).double() - newK = torch.zeros(size=(self.n + 1, self.n + 1)).double() - - M = self.invK @ self.Q - c = 1. / ((self.s ** 2 * self.lam + emb @ emb.T) - emb @ self.Q.T @ M @ emb.T) - - newKinv[0:self.n, 0:self.n] = self.invK + c * M @ emb.T @ emb @ M.T - newKinv[0:self.n, self.n] = (- M @ emb.T * c).view(-1) - newKinv[self.n, 0:self.n] = (- emb @ M.T * c).view(-1) - newKinv[self.n, self.n] = c.view(-1) - - newK[0:self.n, 0:self.n] = self.K - newK[0:self.n, self.n] = emb @ self.Q.T - newK[self.n, 0:self.n] = emb @ self.Q.T - newK[self.n, self.n] = self.s ** 2 * self.lam + emb @ emb.T - self.K = newK - - self.invK = newKinv - - self.add_points(newx, newy) - self.n = self.n + 1 - self.Q = self.embed(self.x) - self.invK_V = (1. / self.lam) * (-self.Q.T @ self.invK @ self.Q + torch.eye(int(self.m))) - - else: # via Woodbury - c = 1 + emb @ self.invV @ emb.T - self.invV = self.invV - (self.invV @ emb.T @ emb @ self.invV) / c - self.add_points(newx, newy) - self.n = self.n + 1 - self.Q = self.embed(self.x) - # add point - - self.check_conversion() - - self.fitted = True - self.to_add = [] - - - elif self.data == True: # just compute the - self.Q = self.embed(self.x) - if self.dual: - I = torch.eye(self.n).double() - Z_ = self.Q @ self.Q.T - self.K = Z_ + self.s * self.s * self.lam * I - # self.invK, _ = torch.solve(I, self.K) - self.invK = torch.pinverse(self.K) - self.invK_V = (1. / self.lam) * (-self.Q.T @ self.invK @ self.Q + torch.eye(int(self.m))) - else: - I = torch.eye(int(self.m)).double() - Z_ = self.Q.T @ self.Q - self.V = Z_ + self.s ** 2 * self.lam * I - self.invV = torch.pinverse(self.V) - - self.fitted = True - else: - pass - else: - pass - - def theta_mean(self, var=False, prior=False): - self.precompute() - - if self.fitted == True and prior == False: - if self.dual: - theta_mean = self.Q.T @ self.invK @ self.y - Z = self.invK_V - else: - theta_mean = self.invV @ self.Q.T @ self.y - Z = self.s ** 2 * self.invV - else: - theta_mean = 0 * torch.ones(size=(self.m, 1)).double() - - if var is False: - return theta_mean - else: - return (theta_mean, Z) - - def mean(self, xtest): - return self.mean_std(xtest)[0] - - def mean_std(self, xtest): - ''' - Calculate mean and variance for GP at xtest points - ''' - self.precompute() - embeding = self.embed(xtest) - - # mean - theta_mean = self.theta_mean() - # print(torch.norm(theta_mean)) - ymean = embeding @ theta_mean - - # std - if not self.dual or self.primal: - diagonal = self.s ** 2 * torch.einsum('ij,jk,ik->i', (embeding, self.invV, embeding)).view(-1, 1) - else: - diagonal = torch.einsum('ij,jk,ik->i', (embeding, self.invK_V, embeding)).view(-1, 1) - - ystd = torch.sqrt(diagonal) - return (ymean, ystd) - - def ucb(self, xtest, delta=0.1): - mu, std = self.mean_std(xtest) - res = mu + np.sqrt(self.beta(delta=delta)) * std - return res - - def lcb(self, xtest, delta=0.1): - mu, std = self.mean_std(xtest) - res = mu - np.sqrt(self.beta(delta=delta)) * std - return res - - def sample_matheron(self, xtest, kernel_object, size=1): - basis = self.get_basis_size() - zeros = torch.zeros(size=(basis, size), dtype=torch.float64) - random_vector = torch.normal(mean=zeros, std=1.) - - Z = self.lam * torch.eye(basis, dtype=torch.float64) - L = torch.linalg.cholesky(Z.transpose(-2, -1).conj()).transpose(-2, -1).conj() - theta = torch.mm(L, random_vector) + self.prior_mean - - f_prior_xtest = torch.mm(self.embed(xtest), theta) - f_prior_x = torch.mm(self.embed(self.x), theta) - - K_star = kernel_object.kernel(self.x, xtest) - N = self.x.size()[0] - K = kernel_object.kernel(self.x, self.x) + self.s ** 2 * self.lam * torch.eye(N) - - f = f_prior_xtest + K_star @ torch.pinverse(K) @ (self.y - f_prior_x) - return f - - def sample_theta(self, size=1, prior=False): - - basis = self.get_basis_size() - - zeros = torch.zeros(size=(basis, size), dtype=torch.float64) - random_vector = torch.normal(mean=zeros, std=1.) - self.precompute() - - if self.fitted == True and prior == False: - self.L = torch.linalg.cholesky(self.get_invV()) * self.s - theta = self.theta_mean() - theta = theta + torch.mm(self.L, random_vector) - else: - Z = self.lam * torch.eye(basis, dtype=torch.float64) - L = torch.linalg.cholesky(Z.transpose(-2, -1).conj()).transpose(-2, -1).conj() - theta = torch.mm(L, random_vector) + self.prior_mean - - return theta - - def theta_mean_constrained(self, weights=None, B=1): - if weights is None: - weights = torch.ones(self.n).double() / self.n - - Q = self.embed(self.x) - theta = cp.Variable(int(torch.sum(self.m).detach().view(-1).numpy())) - objective = cp.Minimize( - cp.sum(weights @ cp.square(Q.detach().numpy() @ theta - self.y.view(-1).detach().numpy()))) - zero = np.zeros(int(torch.sum(self.m))) - constraints = [cp.SOC(theta @ zero + B, theta)] - prob = cp.Problem(objective, constraints) - prob.solve(solver=cp.MOSEK) - return torch.from_numpy(theta.value).view(-1, 1) - - def theta_absolute_deviation(self, weights=None, reg=None): - if weights is None: - weights = torch.ones(self.x.size()[0]) - - if reg is None: # standard regularization - Q = self.embed(self.x) - theta = cp.Variable((int(torch.sum(self.m)), 1)) - objective = cp.Minimize( - cp.sum(weights @ cp.abs(Q.numpy() @ theta - self.y.numpy())) + self.s * self.lam * cp.norm2(theta)) - prob = cp.Problem(objective) - prob.solve() - return torch.from_numpy(theta.value) - else: # custom regularization - Q = self.embed(self.x) - theta = cp.Variable((int(torch.sum(self.m)), 1)) - objective = cp.Minimize( - cp.sum(weights @ cp.abs(Q.numpy() @ theta - self.y.numpy())) + reg * cp.norm2(theta)) - prob = cp.Problem(objective) - prob.solve(solver=cp.MOSEK) - return torch.from_numpy(theta.value) - - def theta_absolute_deviation_constrained(self, weights=None, B=1): - if weights is None: - weights = torch.ones(self.x.size()[0]) - Q = self.embed(self.x) - theta = cp.Variable(int(torch.sum(self.m).detach().view(-1).numpy())) - - objective = cp.Minimize(cp.sum(weights @ cp.abs(Q.detach().numpy() @ theta - self.y.view(-1).detach().numpy()))) - zero = np.zeros(int(torch.sum(self.m))) - constraints = [cp.SOC(theta @ zero + B, theta)] - prob = cp.Problem(objective, constraints) - prob.solve(solver=cp.MOSEK) - return torch.from_numpy(theta.value).view(-1, 1) - - def theta_chebyschev_approximation(self, eps=1.): - Q = self.embed(self.x).detach().numpy() - y = self.y.view(-1).detach().numpy() - - theta = cp.Variable(int(torch.sum(self.m).detach().view(-1).numpy())) - objective = cp.Minimize(cp.sum_squares(theta)) - constraints = [cp.abs(Q @ theta - y) <= eps] - - prob = cp.Problem(objective, constraints) - prob.solve(solver=cp.MOSEK) - res = torch.from_numpy(theta.value).view(-1, 1) - return res - - def interpolation(self, eps=0.): - Q = self.embed(self.x).detach().numpy() - y = self.y.view(-1).detach().numpy() - theta = cp.Variable(int(torch.sum(self.m).detach().view(-1).numpy())) - objective = cp.Minimize(cp.sum_squares(theta)) - constraints = [Q @ theta == y] - - prob = cp.Problem(objective, constraints) - prob.solve() - res = torch.from_numpy(theta.value).view(-1, 1) - - return res - - def mean_squared(self, xtest, weights=None, B=None, theta=False, reg=None): - embeding = self.embed(xtest) - - if B is not None: - theta_mean = self.theta_mean_constrained(weights=weights, B=B) - else: - theta_mean = self.theta_mean(weights=weights, reg=reg) - ymean = torch.mm(embeding, theta_mean) - if theta == True: - return ymean, theta_mean - else: - return ymean - - def mean_aboslute_deviation(self, xtest, weights=None, B=None, theta=False): - embeding = self.embed(xtest) - if B is not None: - theta_mean = self.theta_absolute_deviation_constrained(weights=weights, B=B) - else: - theta_mean = self.theta_absolute_deviation(weights=weights) - ymean = torch.mm(embeding, theta_mean) - if theta == True: - return ymean, theta_mean - else: - return ymean - - """ + """ + Random Fourier Features for Gaussian Kernel + """ + + def __init__( + self, + embedding, + m, + s=0.001, + lam=1.0, + d=1, + diameter=1.0, + theta_norm=1.0, + verbose=True, + groups=None, + bounds=None, + scale=1.0, + kappa=1.0, + poly=2, + primal=True, + beta_fun=None, + bound=1, + ): + + self.s = s + self.lam = lam + self.primal = primal + self.x = None + + self.K = torch.ones(size=(1, 1)).double() + self.mu = 0.0 + + self.m = torch.from_numpy(np.array(m)) + self.fitted = False + self.data = False + + self.d = d + self.n = 0 + self.bounds = bounds + self.groups = groups + self.diameter = diameter + self.theta_norm = theta_norm + + self.verbose = verbose + self.admits_first_order = True + + self.embedding = embedding + self.embedding_map = embedding + + self.kappa = kappa + self.scale = scale + self.poly = poly + + self.to_add = [] + self.prior_mean = 0 + self.linear_kernel = KernelFunction(kernel_name="linear").linear_kernel + self.dual = False + self.beta_fun = beta_fun + self.bound = bound + + def beta(self, delta=0.1, norm=None): + # self.K = Z_ + self.s * self.s * self.lam * I + if norm is None: + norm = self.theta_norm + + if self.beta_fun is None: + return 2.0 + + elif self.beta_fun == "theory": + K = ( + self.kernel(self.x, self.x) + + torch.eye(self.x.size()[0]).double() * self.s**2 * self.lam + ) + + beta_value = ( + self.bound * self.lam + + torch.logdet(K / ((self.s**2) * self.lam)) + + 2 * np.log(1 / delta) + ) + Q = self.embed(self.x) + Lam = self.lam * torch.eye(self.get_basis_size()).double() + V = Q.T @ Q / (self.s**2) + Lam + + beta_value = ( + self.bound * self.lam + + torch.logdet(V) + - torch.logdet(Lam) + + 2 * np.log(1 / delta) + ) + beta_value = beta_value + else: + return self.beta_fun(self.K, delta=delta, norm=norm) + return beta_value + + def description(self): + return "Custom Features object" + + def embed(self, x): + return self.embedding.embed(x) + + def set_embedding(self, embed): + self.embedding_map = embed + + def get_basis_size(self): + return int(torch.sum(self.m)) + + def set_basis_size(self, m): + self.m = m + + def kernel(self, x, y): + embedding = self.embed(x) + embedding2 = self.embed(y) + K = self.linear_kernel(embedding, embedding2) + return K + + def logdet_ratio(self): + I = torch.eye(int(torch.sum(self.m))).double() + return torch.logdet(self.K) - torch.logdet(self.s**2 * self.lam * I) + + def effective_dim(self, xtest): + Phi = self.embed(xtest) + d = torch.trace( + torch.solve( + Phi.T @ Phi, + Phi.T @ Phi + torch.eye(self.get_basis_size()).double() * self.lam, + )[0] + ) + return d + + def add_data_point(self, x, y): + if self.n == 0: + self.fit_gp(x, y) + else: + self.to_add.append([x, y]) + self.fitted = False + + def fit(self, x=None, y=None): + self.fit_gp(self.x, self.y) + + def fit_gp(self, x, y): + """ + Function to Fit GP + """ + self.x = x + self.y = y + self.n = list(self.x.size())[0] + self.d = list(self.x.size())[1] + + if self.n < self.m: + self.dual = True + else: + self.dual = False + + if self.primal == True: + self.dual = False + + self.data = True + self.fitted = False + self.precompute() + return None + + def add_points(self, d): + x, y = d + if self.x is not None: + self.x = torch.cat((self.x, x), dim=0) + self.y = torch.cat((self.y, y), dim=0) + else: + self.x = x + self.y = y + + def check_conversion(self): + """ + Convert between dual and primal form + :return: + """ + if self.primal == False: + if self.n == self.m: # convert do d mode + print("Switching mode to primal.") + self.dual = False + + I = torch.eye(int(self.m)).double() + Z_ = self.linear_kernel(torch.t(self.Q), torch.t(self.Q)) + self.V = Z_ + self.s * self.s * self.lam * torch.eye( + int(self.m), dtype=torch.float64 + ) + self.invV, _ = torch.solve(I, self.V) + + def get_invV(self): + self.precompute() + + if self.dual: + I = torch.eye(self.m).double() + Z_ = self.linear_kernel(torch.t(self.Q), torch.t(self.Q)) + self.V = Z_ + self.s * self.s * self.lam * torch.eye( + self.m, dtype=torch.float64 + ) + self.invV = torch.linalg.solve(self.V, I) + return self.invV + else: + return self.invV + + def precompute(self): + + if self.fitted == False: + if len(self.to_add) > 0: + # something to add via low rank update + for i in range(len(self.to_add)): + newx = self.to_add[i][0] + newy = self.to_add[i][1] + + # rank one update + emb = self.embed(newx) + + if self.dual: # via Shur complements + newKinv = torch.zeros(size=(self.n + 1, self.n + 1)).double() + newK = torch.zeros(size=(self.n + 1, self.n + 1)).double() + + M = self.invK @ self.Q + c = 1.0 / ( + (self.s**2 * self.lam + emb @ emb.T) + - emb @ self.Q.T @ M @ emb.T + ) + + newKinv[0 : self.n, 0 : self.n] = ( + self.invK + c * M @ emb.T @ emb @ M.T + ) + newKinv[0 : self.n, self.n] = (-M @ emb.T * c).view(-1) + newKinv[self.n, 0 : self.n] = (-emb @ M.T * c).view(-1) + newKinv[self.n, self.n] = c.view(-1) + + newK[0 : self.n, 0 : self.n] = self.K + newK[0 : self.n, self.n] = emb @ self.Q.T + newK[self.n, 0 : self.n] = emb @ self.Q.T + newK[self.n, self.n] = self.s**2 * self.lam + emb @ emb.T + self.K = newK + + self.invK = newKinv + + self.add_points(newx, newy) + self.n = self.n + 1 + self.Q = self.embed(self.x) + self.invK_V = (1.0 / self.lam) * ( + -self.Q.T @ self.invK @ self.Q + torch.eye(int(self.m)) + ) + + else: # via Woodbury + c = 1 + emb @ self.invV @ emb.T + self.invV = ( + self.invV - (self.invV @ emb.T @ emb @ self.invV) / c + ) + self.add_points(newx, newy) + self.n = self.n + 1 + self.Q = self.embed(self.x) + # add point + + self.check_conversion() + + self.fitted = True + self.to_add = [] + + elif self.data == True: # just compute the + self.Q = self.embed(self.x) + if self.dual: + I = torch.eye(self.n).double() + Z_ = self.Q @ self.Q.T + self.K = Z_ + self.s * self.s * self.lam * I + # self.invK, _ = torch.solve(I, self.K) + self.invK = torch.pinverse(self.K) + self.invK_V = (1.0 / self.lam) * ( + -self.Q.T @ self.invK @ self.Q + torch.eye(int(self.m)) + ) + else: + I = torch.eye(int(self.m)).double() + Z_ = self.Q.T @ self.Q + self.V = Z_ + self.s**2 * self.lam * I + self.invV = torch.pinverse(self.V) + + self.fitted = True + else: + pass + else: + pass + + def theta_mean(self, var=False, prior=False): + self.precompute() + + if self.fitted == True and prior == False: + if self.dual: + theta_mean = self.Q.T @ self.invK @ self.y + Z = self.invK_V + else: + theta_mean = self.invV @ self.Q.T @ self.y + Z = self.s**2 * self.invV + else: + theta_mean = 0 * torch.ones(size=(self.m, 1)).double() + + if var is False: + return theta_mean + else: + return (theta_mean, Z) + + def mean(self, xtest): + return self.mean_std(xtest)[0] + + def mean_std(self, xtest): + """ + Calculate mean and variance for GP at xtest points + """ + self.precompute() + embeding = self.embed(xtest) + + # mean + theta_mean = self.theta_mean() + # print(torch.norm(theta_mean)) + ymean = embeding @ theta_mean + + # std + if not self.dual or self.primal: + diagonal = self.s**2 * torch.einsum( + "ij,jk,ik->i", (embeding, self.invV, embeding) + ).view(-1, 1) + else: + diagonal = torch.einsum( + "ij,jk,ik->i", (embeding, self.invK_V, embeding) + ).view(-1, 1) + + ystd = torch.sqrt(diagonal) + return (ymean, ystd) + + def ucb(self, xtest, delta=0.1): + mu, std = self.mean_std(xtest) + res = mu + np.sqrt(self.beta(delta=delta)) * std + return res + + def lcb(self, xtest, delta=0.1): + mu, std = self.mean_std(xtest) + res = mu - np.sqrt(self.beta(delta=delta)) * std + return res + + def sample_matheron(self, xtest, kernel_object, size=1): + basis = self.get_basis_size() + zeros = torch.zeros(size=(basis, size), dtype=torch.float64) + random_vector = torch.normal(mean=zeros, std=1.0) + + Z = self.lam * torch.eye(basis, dtype=torch.float64) + L = torch.linalg.cholesky(Z.transpose(-2, -1).conj()).transpose(-2, -1).conj() + theta = torch.mm(L, random_vector) + self.prior_mean + + f_prior_xtest = torch.mm(self.embed(xtest), theta) + f_prior_x = torch.mm(self.embed(self.x), theta) + + K_star = kernel_object.kernel(self.x, xtest) + N = self.x.size()[0] + K = kernel_object.kernel(self.x, self.x) + self.s**2 * self.lam * torch.eye(N) + + f = f_prior_xtest + K_star @ torch.pinverse(K) @ (self.y - f_prior_x) + return f + + def sample_theta(self, size=1, prior=False): + + basis = self.get_basis_size() + + zeros = torch.zeros(size=(basis, size), dtype=torch.float64) + random_vector = torch.normal(mean=zeros, std=1.0) + self.precompute() + + if self.fitted == True and prior == False: + self.L = torch.linalg.cholesky(self.get_invV()) * self.s + theta = self.theta_mean() + theta = theta + torch.mm(self.L, random_vector) + else: + Z = self.lam * torch.eye(basis, dtype=torch.float64) + L = ( + torch.linalg.cholesky(Z.transpose(-2, -1).conj()) + .transpose(-2, -1) + .conj() + ) + theta = torch.mm(L, random_vector) + self.prior_mean + + return theta + + def theta_mean_constrained(self, weights=None, B=1): + if weights is None: + weights = torch.ones(self.n).double() / self.n + + Q = self.embed(self.x) + theta = cp.Variable(int(torch.sum(self.m).detach().view(-1).numpy())) + objective = cp.Minimize( + cp.sum( + weights + @ cp.square( + Q.detach().numpy() @ theta - self.y.view(-1).detach().numpy() + ) + ) + ) + zero = np.zeros(int(torch.sum(self.m))) + constraints = [cp.SOC(theta @ zero + B, theta)] + prob = cp.Problem(objective, constraints) + prob.solve(solver=cp.MOSEK) + return torch.from_numpy(theta.value).view(-1, 1) + + def theta_absolute_deviation(self, weights=None, reg=None): + if weights is None: + weights = torch.ones(self.x.size()[0]) + + if reg is None: # standard regularization + Q = self.embed(self.x) + theta = cp.Variable((int(torch.sum(self.m)), 1)) + objective = cp.Minimize( + cp.sum(weights @ cp.abs(Q.numpy() @ theta - self.y.numpy())) + + self.s * self.lam * cp.norm2(theta) + ) + prob = cp.Problem(objective) + prob.solve() + return torch.from_numpy(theta.value) + else: # custom regularization + Q = self.embed(self.x) + theta = cp.Variable((int(torch.sum(self.m)), 1)) + objective = cp.Minimize( + cp.sum(weights @ cp.abs(Q.numpy() @ theta - self.y.numpy())) + + reg * cp.norm2(theta) + ) + prob = cp.Problem(objective) + prob.solve(solver=cp.MOSEK) + return torch.from_numpy(theta.value) + + def theta_absolute_deviation_constrained(self, weights=None, B=1): + if weights is None: + weights = torch.ones(self.x.size()[0]) + Q = self.embed(self.x) + theta = cp.Variable(int(torch.sum(self.m).detach().view(-1).numpy())) + + objective = cp.Minimize( + cp.sum( + weights + @ cp.abs(Q.detach().numpy() @ theta - self.y.view(-1).detach().numpy()) + ) + ) + zero = np.zeros(int(torch.sum(self.m))) + constraints = [cp.SOC(theta @ zero + B, theta)] + prob = cp.Problem(objective, constraints) + prob.solve(solver=cp.MOSEK) + return torch.from_numpy(theta.value).view(-1, 1) + + def theta_chebyschev_approximation(self, eps=1.0): + Q = self.embed(self.x).detach().numpy() + y = self.y.view(-1).detach().numpy() + + theta = cp.Variable(int(torch.sum(self.m).detach().view(-1).numpy())) + objective = cp.Minimize(cp.sum_squares(theta)) + constraints = [cp.abs(Q @ theta - y) <= eps] + + prob = cp.Problem(objective, constraints) + prob.solve(solver=cp.MOSEK) + res = torch.from_numpy(theta.value).view(-1, 1) + return res + + def interpolation(self, eps=0.0): + Q = self.embed(self.x).detach().numpy() + y = self.y.view(-1).detach().numpy() + theta = cp.Variable(int(torch.sum(self.m).detach().view(-1).numpy())) + objective = cp.Minimize(cp.sum_squares(theta)) + constraints = [Q @ theta == y] + + prob = cp.Problem(objective, constraints) + prob.solve() + res = torch.from_numpy(theta.value).view(-1, 1) + + return res + + def mean_squared(self, xtest, weights=None, B=None, theta=False, reg=None): + embeding = self.embed(xtest) + + if B is not None: + theta_mean = self.theta_mean_constrained(weights=weights, B=B) + else: + theta_mean = self.theta_mean(weights=weights, reg=reg) + ymean = torch.mm(embeding, theta_mean) + if theta == True: + return ymean, theta_mean + else: + return ymean + + def mean_aboslute_deviation(self, xtest, weights=None, B=None, theta=False): + embeding = self.embed(xtest) + if B is not None: + theta_mean = self.theta_absolute_deviation_constrained(weights=weights, B=B) + else: + theta_mean = self.theta_absolute_deviation(weights=weights) + ymean = torch.mm(embeding, theta_mean) + if theta == True: + return ymean, theta_mean + else: + return ymean + + """ Hessian """ - def mean_gradient_hessian(self, xtest, hessian=False): - hessian_mu = torch.zeros(size=(self.d, self.d), dtype=torch.float64) - xtest.requires_grad_(True) - # xtest.retain_grad() - mu = self.mean_std(xtest)[0] - # mu.backward(retain_graph=True) - - # nabla_mu = xtest.grad - nabla_mu = grad(mu, xtest, create_graph=True)[0][0] - - if hessian == False: - return nabla_mu - else: - for i in range(self.d): - hessian_mu[i, :] = grad(nabla_mu[i], xtest, create_graph=True, retain_graph=True)[0][0] - return [nabla_mu, hessian_mu] - - """ + def mean_gradient_hessian(self, xtest, hessian=False): + hessian_mu = torch.zeros(size=(self.d, self.d), dtype=torch.float64) + xtest.requires_grad_(True) + # xtest.retain_grad() + mu = self.mean_std(xtest)[0] + # mu.backward(retain_graph=True) + + # nabla_mu = xtest.grad + nabla_mu = grad(mu, xtest, create_graph=True)[0][0] + + if hessian == False: + return nabla_mu + else: + for i in range(self.d): + hessian_mu[i, :] = grad( + nabla_mu[i], xtest, create_graph=True, retain_graph=True + )[0][0] + return [nabla_mu, hessian_mu] + + """ Optimization """ - def ucb_optimize(self, beta, multistart=25, lcb=False, minimizer="L-BFGS-B"): - - # precompute important (theta) - theta_mean, K = self.theta_mean(var=True) - - if lcb == False: - fun = lambda x: - (self.embed(torch.from_numpy(x).view(1, -1)) @ theta_mean + \ - beta * torch.sqrt(self.embed(torch.from_numpy(x).view(1, -1)) @ K @ self.embed( - torch.from_numpy(x).view(1, -1)).T)).detach().numpy()[0] - else: - fun = lambda x: - (self.embed(torch.from_numpy(x).view(1, -1)) @ theta_mean - \ - beta * torch.sqrt(self.embed(torch.from_numpy(x).view(1, -1)) @ K @ self.embed( - torch.from_numpy(x).view(1, -1)).T).detach().numpy()[0]).numpy()[0] - - if self.bounds == None: - mybounds = tuple([(-self.diameter, self.diameter) for _ in range(self.d)]) - else: - mybounds = self.bounds - - results = [] - for j in range(multistart): - - x0 = np.random.randn(self.d) - for i in range(self.d): - x0[i] = np.random.uniform(mybounds[i][0], mybounds[i][1]) - - if minimizer == "L-BFGS-B": - res = minimize(fun, x0, method="L-BFGS-B", jac=None, tol=0.0001, bounds=mybounds) - solution = res.x - else: - raise AssertionError("Wrong optimizer selected.") - - results.append([solution, -fun(solution)]) - - results = np.array(results) - index = np.argmax(results[:, 1]) - solution = results[index, 0] - return (torch.from_numpy(solution).view(1, -1), -torch.from_numpy(fun(solution))) - - def sample_and_optimize(self, xtest=None, multistart=25, minimizer="L-BFGS-B", grid=100, verbose=0): - ''' - Sample functions from Gaussian Process and take Maximum using - first order maximization - ''' - - # sample linear approximating - theta = self.sample_theta() - - # get bounds - if self.bounds == None: - mybounds = tuple([(-self.diameter, self.diameter) for _ in range(self.d)]) - else: - mybounds = self.bounds - - fun = lambda x: -torch.mm(torch.t(theta), torch.t(self.embed(torch.from_numpy(x).view(1, -1)))).numpy()[0] - - results = [] - for j in range(multistart): - x0 = np.random.randn(self.d) - for i in range(self.d): - x0[i] = np.random.uniform(mybounds[i][0], mybounds[i][1]) - - if minimizer == "L-BFGS-B": - res = minimize(fun, x0, method="L-BFGS-B", jac=None, tol=0.0001, bounds=mybounds) - solution = res.x - else: - raise AssertionError("Wrong optimizer selected.") - - results.append([solution, -fun(solution)]) - results = np.array(results) - index = np.argmax(results[:, 1]) - solution = results[index, 0] - - return (torch.from_numpy(solution), -torch.from_numpy(fun(solution))) - - def sample(self, xtest, size=1, prior=False): - ''' - Sample functions from Gaussian Process - ''' - theta = self.sample_theta(size=size, prior=prior) - f = torch.mm(self.embed(xtest), theta) - return f - - def sample_and_max(self, xtest, size=1): - ''' - Sample functions from Gaussian Process and take Maximum - ''' - f = self.sample(xtest, size=size) - index = np.argmax(f, axis=0) - return (xtest[index, :], f[index, :]) - - def get_kernel(self): - embeding = self.embed(self.x) - Z_ = self.linear_kernel(embeding, embeding) - K = (Z_ + self.s * self.s * self.lam * torch.eye(int(self.n), dtype=torch.float64)) - return K - - def residuals(self): - mu, _ = self.mean_std(self.x) - out = torch.sum((mu - self.y) ** 2) - return out -if __name__ == "__main__": - N = 10 - s = 0.1 - n = 256 - L_infinity_ball = 0.5 - - d = 1 - m = 128 + def ucb_optimize(self, beta, multistart=25, lcb=False, minimizer="L-BFGS-B"): + + # precompute important (theta) + theta_mean, K = self.theta_mean(var=True) + + if lcb == False: + fun = ( + lambda x: -( + self.embed(torch.from_numpy(x).view(1, -1)) @ theta_mean + + beta + * torch.sqrt( + self.embed(torch.from_numpy(x).view(1, -1)) + @ K + @ self.embed(torch.from_numpy(x).view(1, -1)).T + ) + ) + .detach() + .numpy()[0] + ) + else: + fun = lambda x: -( + self.embed(torch.from_numpy(x).view(1, -1)) @ theta_mean + - beta + * torch.sqrt( + self.embed(torch.from_numpy(x).view(1, -1)) + @ K + @ self.embed(torch.from_numpy(x).view(1, -1)).T + ) + .detach() + .numpy()[0] + ).numpy()[0] + + if self.bounds == None: + mybounds = tuple([(-self.diameter, self.diameter) for _ in range(self.d)]) + else: + mybounds = self.bounds + + results = [] + for j in range(multistart): + + x0 = np.random.randn(self.d) + for i in range(self.d): + x0[i] = np.random.uniform(mybounds[i][0], mybounds[i][1]) + + if minimizer == "L-BFGS-B": + res = minimize( + fun, x0, method="L-BFGS-B", jac=None, tol=0.0001, bounds=mybounds + ) + solution = res.x + else: + raise AssertionError("Wrong optimizer selected.") + + results.append([solution, -fun(solution)]) + + results = np.array(results) + index = np.argmax(results[:, 1]) + solution = results[index, 0] + return ( + torch.from_numpy(solution).view(1, -1), + -torch.from_numpy(fun(solution)), + ) + + def sample_and_optimize( + self, xtest=None, multistart=25, minimizer="L-BFGS-B", grid=100, verbose=0 + ): + """ + Sample functions from Gaussian Process and take Maximum using + first order maximization + """ + + # sample linear approximating + theta = self.sample_theta() + + # get bounds + if self.bounds == None: + mybounds = tuple([(-self.diameter, self.diameter) for _ in range(self.d)]) + else: + mybounds = self.bounds + + fun = lambda x: -torch.mm( + torch.t(theta), torch.t(self.embed(torch.from_numpy(x).view(1, -1))) + ).numpy()[0] + + results = [] + for j in range(multistart): + x0 = np.random.randn(self.d) + for i in range(self.d): + x0[i] = np.random.uniform(mybounds[i][0], mybounds[i][1]) + + if minimizer == "L-BFGS-B": + res = minimize( + fun, x0, method="L-BFGS-B", jac=None, tol=0.0001, bounds=mybounds + ) + solution = res.x + else: + raise AssertionError("Wrong optimizer selected.") + + results.append([solution, -fun(solution)]) + results = np.array(results) + index = np.argmax(results[:, 1]) + solution = results[index, 0] + + return (torch.from_numpy(solution), -torch.from_numpy(fun(solution))) + + def sample(self, xtest, size=1, prior=False): + """ + Sample functions from Gaussian Process + """ + theta = self.sample_theta(size=size, prior=prior) + f = torch.mm(self.embed(xtest), theta) + return f + + def sample_and_max(self, xtest, size=1): + """ + Sample functions from Gaussian Process and take Maximum + """ + f = self.sample(xtest, size=size) + index = np.argmax(f, axis=0) + return (xtest[index, :], f[index, :]) + + def get_kernel(self): + embeding = self.embed(self.x) + Z_ = self.linear_kernel(embeding, embeding) + K = Z_ + self.s * self.s * self.lam * torch.eye( + int(self.n), dtype=torch.float64 + ) + return K + + def residuals(self): + mu, _ = self.mean_std(self.x) + out = torch.sum((mu - self.y) ** 2) + return out - xtest = torch.from_numpy(interval(n, d, L_infinity_ball=L_infinity_ball)) - x = torch.from_numpy(np.random.uniform(-L_infinity_ball, L_infinity_ball, N)).view(-1, 1) - F_true = lambda x: torch.sin(x * 4) ** 2 - 0.1 - F = lambda x: F_true(x) + s * torch.randn(x.size()[0]).view(-1, 1).double() - y = F(x) - - emb = RFFEmbedding(m=m, gamma=0.1) - Reggr = KernelizedFeatures(embedding=emb, m=m, d=1) - Reggr.fit_gp(x, y) - Reggr.visualize(xtest, f_true=F_true) +if __name__ == "__main__": + N = 10 + s = 0.1 + n = 256 + L_infinity_ball = 0.5 + + d = 1 + m = 128 + + xtest = torch.from_numpy(interval(n, d, L_infinity_ball=L_infinity_ball)) + x = torch.from_numpy(np.random.uniform(-L_infinity_ball, L_infinity_ball, N)).view( + -1, 1 + ) + + F_true = lambda x: torch.sin(x * 4) ** 2 - 0.1 + F = lambda x: F_true(x) + s * torch.randn(x.size()[0]).view(-1, 1).double() + y = F(x) + + emb = RFFEmbedding(m=m, gamma=0.1) + Reggr = KernelizedFeatures(embedding=emb, m=m, d=1) + Reggr.fit_gp(x, y) + Reggr.visualize(xtest, f_true=F_true) diff --git a/stpy/continuous_processes/kernelized_features_old.py b/stpy/continuous_processes/kernelized_features_old.py index 32877f8..62f802c 100755 --- a/stpy/continuous_processes/kernelized_features_old.py +++ b/stpy/continuous_processes/kernelized_features_old.py @@ -10,649 +10,757 @@ class KernelizedFeatures(GaussianProcess): - ''' - Random Fourier Features for Gaussian Kernel - ''' - - def __init__(self, embedding, m, s=0.001, lam=1., d=1, diameter=1.0, verbose=True, groups=None, - bounds=None, scale=1.0, kappa=1.0, poly=2, primal=True, beta_fun = None ): - - self.s = s - self.lam = lam - self.primal = primal - self.x = None - - self.K = 0 - self.mu = 0.0 - - self.m = torch.from_numpy(np.array(m)) - self.fitted = False - self.data = False - - self.d = d - self.n = 0 - self.bounds = bounds - self.groups = groups - self.diameter = diameter - - self.verbose = verbose - self.admits_first_order = True - - self.embedding = embedding - self.embedding_map = embedding - - self.kappa = kappa - self.scale = scale - self.poly = poly - - self.to_add = [] - self.prior_mean = 0 - self.linear_kernel = KernelFunction(kernel_name="linear").linear_kernel - self.dual = False - - def beta(self, delta=1e-2, norm=1, theory=False, variance_only=False): - if not theory: - beta_value = 2. - else: - embeding = self.embed(self.x) - n = self.x.size()[0] - Z_ = self.linear_kernel(embeding, embeding) - K = (Z_ + self.lam * torch.eye(int(self.n), dtype=torch.float64)) - if not variance_only: - beta_value = norm * np.sqrt(self.lam) + self.s * np.sqrt( - torch.logdet(K) - n * np.log(self.lam) + 2. * np.log(1. / delta)) - else: - beta_value = self.s * np.sqrt(torch.logdet(K) - n * np.log(self.lam) + 2. * np.log(1. / delta)) - return beta_value - - def description(self): - return "Custom Features object" - - def norm(self): - if self.fitted: - norm = torch.linalg.norm(self.theta_mean()) - return norm - else: - return None - - def embed(self, x): - return self.embedding.embed(x) - - def set_embedding(self, embed): - self.embedding_map = embed - - def get_basis_size(self): - return int(torch.sum(self.m)) - - def set_basis_size(self, m): - self.m = m - - def kernel(self, x, y): - embedding = self.embed(x) - embedding2 = self.embed(y) - K = self.linear_kernel(embedding, embedding2) - return K - - def logdet_ratio(self): - I = torch.eye(int(torch.sum(self.m))).double() - return torch.logdet(self.K) - torch.logdet(self.s ** 2 * self.lam * I) - - def effective_dim(self, xtest): - Phi = self.embed(xtest) - d = torch.trace(torch.solve(Phi.T @ Phi, Phi.T @ Phi + torch.eye(self.get_basis_size()).double() * self.lam)[0]) - return d - - def add_data_point(self, x, y): - if self.n == 0: - self.fit_gp(x, y) - else: - self.to_add.append([x, y]) - self.fitted = False - - def fit(self,x= None, y=None): - self.fit_gp(self.x,self.y) - - def fit_gp_soft(self, x, y, A, b, std=None): - self.fit_gp(x, y) - Q = self.embed(self.x) - theta = cp.Variable(int(torch.sum(self.m).detach().view(-1).numpy())) - if std is not None: - P = np.diag(1 / (std ** 2)) - else: - P = np.diag(np.ones(A.shape[0])) - - objective = cp.Minimize( - cp.sum(cp.square(Q.detach().numpy() @ theta - self.y.view(-1).detach().numpy())) - + self.s ** 2 * cp.quad_form(A @ theta - b, P) + self.lam * self.s ** 2 * cp.sum_squares(theta)) - prob = cp.Problem(objective) - prob.solve(solver=cp.MOSEK, verbose=False) - return torch.from_numpy(theta.value).view(-1, 1) - - def fit_gp_equality_fast(self, x, y, A, b, rcond=1e-2): - self.fit_gp(x, y) - Q = self.embed(self.x) - I = torch.zeros(Q.size()[1]).double() - - V = Q.T @ Q - self.lam * self.s ** 2 * I - e = Q.T @ self.y - - R = torch.from_numpy(orth(A.detach().numpy().T)).T - b = torch.zeros(size=(R.size()[0], 1)).double() - M = torch.vstack([V, R]) - v = torch.vstack([e, b.view(-1, 1)]) - theta = torch.linalg.lstsq(M, v.view(-1))[0].view(-1, 1) - return theta - - def fit_gp_equality(self, x, y, A, b, eps=1e-6, rcond=1e-6): - self.fit_gp(x, y) - Q = self.embed(self.x) - - if eps is not None: - theta = cp.Variable(int(torch.sum(self.m).detach().view(-1).numpy())) - objective = cp.Minimize( - cp.sum_squares(Q.detach().numpy() @ theta - self.y.view( - -1).detach().numpy()) + self.lam * self.s ** 2 * cp.sum_squares(theta)) - - constraints = [A.detach().numpy() @ theta - b.detach().view(-1).numpy() <= np.ones(A.size()[0]) * eps ** 2] - constraints += [ - A.detach().numpy() @ theta - b.detach().view(-1).numpy() >= -np.ones(A.size()[0]) * eps ** 2] - - prob = cp.Problem(objective, constraints) - prob.solve(solver=cp.MOSEK, verbose=True) - return torch.from_numpy(theta.value).view(-1, 1) - else: - r = torch.linalg.lstsq(A, b)[0] - N = null_space(A.detach().numpy(), rcond=rcond) - theta = cp.Variable(N.shape[1]) - - objective = cp.Minimize( - cp.sum_squares(Q.detach().numpy() @ N @ theta - self.y.view( - -1).detach().numpy()) + self.lam * self.s ** 2 * cp.sum_squares(theta)) - - prob = cp.Problem(objective) - prob.solve(solver=cp.MOSEK, verbose=True) - return torch.from_numpy(N @ theta.value + r.numpy()).view(-1, 1) - - def fit_gp(self, x, y): - ''' - Function to Fit GP - ''' - self.x = x - self.y = y - self.n = list(self.x.size())[0] - self.d = list(self.x.size())[1] - - if self.n < self.m: - self.dual = True - else: - self.dual = False - - if self.primal == True: - self.dual = False - - self.data = True - self.fitted = False - return None - - def add_points(self, x, y): - if self.x is not None: - self.x = torch.cat((self.x, x), dim=0) - self.y = torch.cat((self.y, y), dim=0) - else: - self.x = x - self.y = y - - def check_conversion(self): - """ - Convert between dual and primal form - :return: - """ - if self.primal == False: - if self.n == self.m: # convert do d mode - print("Switching mode to primal.") - self.dual = False - - I = torch.eye(int(self.m)).double() - Z_ = self.linear_kernel(torch.t(self.Q), torch.t(self.Q)) - self.V = (Z_ + self.s * self.s * self.lam * torch.eye(int(self.m), dtype=torch.float64)) - self.invV, _ = torch.solve(I, self.V) - - def get_invV(self): - self.precompute() - - if self.dual: - I = torch.eye(self.m).double() - Z_ = self.linear_kernel(torch.t(self.Q), torch.t(self.Q)) - self.V = (Z_ + self.s * self.s * self.lam * torch.eye(self.m, dtype=torch.float64)) - self.invV, _ = torch.solve(I, self.V) - return self.invV - else: - return self.invV - - def precompute(self): - if self.fitted == False: - if len(self.to_add) > 0: - # something to add via low rank update - for i in range(len(self.to_add)): - newx = self.to_add[i][0] - newy = self.to_add[i][1] - - # rank one update - emb = self.embed(newx) - - if self.dual: # via Shur complements - newKinv = torch.zeros(size=(self.n + 1, self.n + 1)).double() - M = self.invK @ self.Q - c = 1. / ((self.s ** 2 * self.lam + emb @ emb.T) - emb @ self.Q.T @ M @ emb.T) - - newKinv[0:self.n, 0:self.n] = self.invK + c * M @ emb.T @ emb @ M.T - newKinv[0:self.n, self.n] = (- M @ emb.T * c).view(-1) - newKinv[self.n, 0:self.n] = (- emb @ M.T * c).view(-1) - newKinv[self.n, self.n] = c.view(-1) - - self.invK = newKinv - - self.add_points(newx, newy) - self.n = self.n + 1 - self.Q = self.embed(self.x) - - self.invK_V = (1. / self.lam) * (-self.Q.T @ self.invK @ self.Q + torch.eye(int(self.m))) - - else: # via Woodbury - c = 1 + emb @ self.invV @ emb.T - self.invV = self.invV - (self.invV @ emb.T @ emb @ self.invV) / c - self.add_points(newx, newy) - self.n = self.n + 1 - self.Q = self.embed(self.x) - # add point - - self.check_conversion() - - self.fitted = True - self.to_add = [] - - - elif self.data == True: # just compute the - self.Q = self.embed(self.x) - if not self.dual: - I = torch.eye(int(self.m)).double() - Z_ = self.Q.T @ self.Q - self.V = Z_ + self.s ** 2 * self.lam * I - self.invV = torch.pinverse(self.V, rcond=1e-10) - else: - I = torch.eye(self.n).double() - Z_ = self.Q @ self.Q.T - self.K = Z_ + self.s * self.s * self.lam * I - # self.invK, _ = torch.solve(I, self.K) - self.invK = torch.pinverse(self.K) - self.invK_V = (1. / self.lam) * (-self.Q.T @ self.invK @ self.Q + torch.eye(int(self.m))) - self.fitted = True - else: - I = torch.eye(int(self.m)).double() - self.V = self.s ** 2 * self.lam * I - self.invV = torch.pinverse(self.V, rcond=1e-10) - else: - pass - - def theta_mean(self, var=False, prior=False): - - self.precompute() - if self.fitted == True and prior == False: - if self.dual: - theta_mean = self.Q.T @ self.invK @ self.y - Z = self.invK_V - else: - theta_mean = self.invV @ self.Q.T @ self.y - Z = self.s ** 2 * self.invV - else: - theta_mean = 0 * torch.ones(size=(self.m, 1)).double() - - if var is False: - return theta_mean - else: - return (theta_mean, Z) - - def mean_std(self, xtest): - ''' - Calculate mean and variance for GP at xtest points - ''' - # self.precompute() - embeding = self.embed(xtest) - - # mean - theta_mean = self.theta_mean() - ymean = embeding @ theta_mean - - # std - if not self.dual: - diagonal = self.s ** 2 * torch.einsum('ij,jk,ik->i', (embeding, self.invV, embeding)).view(-1, 1) - else: - diagonal = torch.einsum('ij,jk,ik->i', (embeding, self.invK_V, embeding)).view(-1, 1) - - ystd = torch.sqrt(diagonal) - return (ymean, ystd) - - def sample_matheron(self, xtest, kernel_object, size=1): - basis = self.get_basis_size() - zeros = torch.zeros(size=(basis, size), dtype=torch.float64) - random_vector = torch.normal(mean=zeros, std=1.) - - Z = self.lam * torch.eye(basis, dtype=torch.float64) - L = torch.linalg.cholesky(Z.transpose(-2, -1).conj()).transpose(-2, -1).conj() - theta = torch.mm(L, random_vector) + self.prior_mean - - f_prior_xtest = torch.mm(self.embed(xtest), theta) - f_prior_x = torch.mm(self.embed(self.x), theta) - - K_star = kernel_object.kernel(self.x, xtest) - N = self.x.size()[0] - K = kernel_object.kernel(self.x, self.x) + self.s ** 2 * self.lam * torch.eye(N) - - f = f_prior_xtest + K_star @ torch.pinverse(K) @ (self.y - f_prior_x) - return f - - def sample_theta(self, size=1, prior=False): - - basis = self.get_basis_size() - - zeros = torch.zeros(size=(basis, size), dtype=torch.float64) - random_vector = torch.normal(mean=zeros, std=1.).double() - self.precompute() - - if self.fitted == True and prior == False: - self.L = torch.linalg.cholesky(self.get_invV()) * self.s - theta = self.theta_mean().view(-1, 1) - print(theta.size()) - print(self.L.size()) - print(random_vector.size()) - theta = theta + torch.mm(self.L, random_vector) - else: - Z = (self.lam) * torch.eye(basis, dtype=torch.float64) - L = torch.linalg.cholesky(Z.transpose(-2, -1).conj()).transpose(-2, -1).conj() - theta = torch.mm(L, random_vector) + self.prior_mean - - return theta - - def theta_mean_constrained(self, weights=None, B=1): - if weights is None: - weights = torch.ones(self.n).double() / self.n - - Q = self.embed(self.x) - theta = cp.Variable(int(torch.sum(self.m).detach().view(-1).numpy())) - objective = cp.Minimize( - cp.sum(weights @ cp.square(Q.detach().numpy() @ theta - self.y.view(-1).detach().numpy()))) - zero = np.zeros(int(torch.sum(self.m))) - constraints = [cp.SOC(theta @ zero + B, theta)] - prob = cp.Problem(objective, constraints) - prob.solve(solver=cp.MOSEK) - return torch.from_numpy(theta.value).view(-1, 1) - - def theta_absolute_deviation(self, weights=None, reg=None): - if weights is None: - weights = torch.ones(self.x.size()[0]) - - if reg is None: # standard regularization - Q = self.embed(self.x) - theta = cp.Variable((int(torch.sum(self.m)), 1)) - objective = cp.Minimize( - cp.sum(weights @ cp.abs(Q.numpy() @ theta - self.y.numpy())) + self.s * self.lam * cp.norm2(theta)) - prob = cp.Problem(objective) - prob.solve() - return torch.from_numpy(theta.value) - else: # custom regularization - Q = self.embed(self.x) - theta = cp.Variable((int(torch.sum(self.m)), 1)) - objective = cp.Minimize( - cp.sum(weights @ cp.abs(Q.numpy() @ theta - self.y.numpy())) + reg * cp.norm2(theta)) - prob = cp.Problem(objective) - prob.solve(solver=cp.MOSEK) - return torch.from_numpy(theta.value) - - def theta_absolute_deviation_constrained(self, weights=None, B=1): - if weights is None: - weights = torch.ones(self.x.size()[0]) - Q = self.embed(self.x) - theta = cp.Variable(int(torch.sum(self.m).detach().view(-1).numpy())) - - objective = cp.Minimize(cp.sum(weights @ cp.abs(Q.detach().numpy() @ theta - self.y.view(-1).detach().numpy()))) - zero = np.zeros(int(torch.sum(self.m))) - constraints = [cp.SOC(theta @ zero + B, theta)] - prob = cp.Problem(objective, constraints) - prob.solve(solver=cp.MOSEK) - return torch.from_numpy(theta.value).view(-1, 1) - - def theta_chebyschev_approximation(self, eps=1.): - Q = self.embed(self.x).detach().numpy() - y = self.y.view(-1).detach().numpy() - - theta = cp.Variable(int(torch.sum(self.m).detach().view(-1).numpy())) - objective = cp.Minimize(cp.sum_squares(theta)) - constraints = [cp.abs(Q @ theta - y) <= eps] - - prob = cp.Problem(objective, constraints) - prob.solve(solver=cp.MOSEK) - res = torch.from_numpy(theta.value).view(-1, 1) - return res - - def interpolation(self, eps=0.): - Q = self.embed(self.x).detach().numpy() - y = self.y.view(-1).detach().numpy() - theta = cp.Variable(int(torch.sum(self.m).detach().view(-1).numpy())) - objective = cp.Minimize(cp.sum_squares(theta)) - constraints = [Q @ theta == y] - - prob = cp.Problem(objective, constraints) - prob.solve() - res = torch.from_numpy(theta.value).view(-1, 1) - - return res - - def mean_squared(self, xtest, weights=None, B=None, theta=False, reg=None): - embeding = self.embed(xtest) - - if B is not None: - theta_mean = self.theta_mean_constrained(weights=weights, B=B) - else: - theta_mean = self.theta_mean(weights=weights, reg=reg) - ymean = torch.mm(embeding, theta_mean) - if theta == True: - return ymean, theta_mean - else: - return ymean - - def mean_aboslute_deviation(self, xtest, weights=None, B=None, theta=False): - embeding = self.embed(xtest) - if B is not None: - theta_mean = self.theta_absolute_deviation_constrained(weights=weights, B=B) - else: - theta_mean = self.theta_absolute_deviation(weights=weights) - ymean = torch.mm(embeding, theta_mean) - if theta == True: - return ymean, theta_mean - else: - return ymean - - """ + """ + Random Fourier Features for Gaussian Kernel + """ + + def __init__( + self, + embedding, + m, + s=0.001, + lam=1.0, + d=1, + diameter=1.0, + verbose=True, + groups=None, + bounds=None, + scale=1.0, + kappa=1.0, + poly=2, + primal=True, + beta_fun=None, + ): + + self.s = s + self.lam = lam + self.primal = primal + self.x = None + + self.K = 0 + self.mu = 0.0 + + self.m = torch.from_numpy(np.array(m)) + self.fitted = False + self.data = False + + self.d = d + self.n = 0 + self.bounds = bounds + self.groups = groups + self.diameter = diameter + + self.verbose = verbose + self.admits_first_order = True + + self.embedding = embedding + self.embedding_map = embedding + + self.kappa = kappa + self.scale = scale + self.poly = poly + + self.to_add = [] + self.prior_mean = 0 + self.linear_kernel = KernelFunction(kernel_name="linear").linear_kernel + self.dual = False + + def beta(self, delta=1e-2, norm=1, theory=False, variance_only=False): + if not theory: + beta_value = 2.0 + else: + embeding = self.embed(self.x) + n = self.x.size()[0] + Z_ = self.linear_kernel(embeding, embeding) + K = Z_ + self.lam * torch.eye(int(self.n), dtype=torch.float64) + if not variance_only: + beta_value = norm * np.sqrt(self.lam) + self.s * np.sqrt( + torch.logdet(K) - n * np.log(self.lam) + 2.0 * np.log(1.0 / delta) + ) + else: + beta_value = self.s * np.sqrt( + torch.logdet(K) - n * np.log(self.lam) + 2.0 * np.log(1.0 / delta) + ) + return beta_value + + def description(self): + return "Custom Features object" + + def norm(self): + if self.fitted: + norm = torch.linalg.norm(self.theta_mean()) + return norm + else: + return None + + def embed(self, x): + return self.embedding.embed(x) + + def set_embedding(self, embed): + self.embedding_map = embed + + def get_basis_size(self): + return int(torch.sum(self.m)) + + def set_basis_size(self, m): + self.m = m + + def kernel(self, x, y): + embedding = self.embed(x) + embedding2 = self.embed(y) + K = self.linear_kernel(embedding, embedding2) + return K + + def logdet_ratio(self): + I = torch.eye(int(torch.sum(self.m))).double() + return torch.logdet(self.K) - torch.logdet(self.s**2 * self.lam * I) + + def effective_dim(self, xtest): + Phi = self.embed(xtest) + d = torch.trace( + torch.solve( + Phi.T @ Phi, + Phi.T @ Phi + torch.eye(self.get_basis_size()).double() * self.lam, + )[0] + ) + return d + + def add_data_point(self, x, y): + if self.n == 0: + self.fit_gp(x, y) + else: + self.to_add.append([x, y]) + self.fitted = False + + def fit(self, x=None, y=None): + self.fit_gp(self.x, self.y) + + def fit_gp_soft(self, x, y, A, b, std=None): + self.fit_gp(x, y) + Q = self.embed(self.x) + theta = cp.Variable(int(torch.sum(self.m).detach().view(-1).numpy())) + if std is not None: + P = np.diag(1 / (std**2)) + else: + P = np.diag(np.ones(A.shape[0])) + + objective = cp.Minimize( + cp.sum( + cp.square(Q.detach().numpy() @ theta - self.y.view(-1).detach().numpy()) + ) + + self.s**2 * cp.quad_form(A @ theta - b, P) + + self.lam * self.s**2 * cp.sum_squares(theta) + ) + prob = cp.Problem(objective) + prob.solve(solver=cp.MOSEK, verbose=False) + return torch.from_numpy(theta.value).view(-1, 1) + + def fit_gp_equality_fast(self, x, y, A, b, rcond=1e-2): + self.fit_gp(x, y) + Q = self.embed(self.x) + I = torch.zeros(Q.size()[1]).double() + + V = Q.T @ Q - self.lam * self.s**2 * I + e = Q.T @ self.y + + R = torch.from_numpy(orth(A.detach().numpy().T)).T + b = torch.zeros(size=(R.size()[0], 1)).double() + M = torch.vstack([V, R]) + v = torch.vstack([e, b.view(-1, 1)]) + theta = torch.linalg.lstsq(M, v.view(-1))[0].view(-1, 1) + return theta + + def fit_gp_equality(self, x, y, A, b, eps=1e-6, rcond=1e-6): + self.fit_gp(x, y) + Q = self.embed(self.x) + + if eps is not None: + theta = cp.Variable(int(torch.sum(self.m).detach().view(-1).numpy())) + objective = cp.Minimize( + cp.sum_squares( + Q.detach().numpy() @ theta - self.y.view(-1).detach().numpy() + ) + + self.lam * self.s**2 * cp.sum_squares(theta) + ) + + constraints = [ + A.detach().numpy() @ theta - b.detach().view(-1).numpy() + <= np.ones(A.size()[0]) * eps**2 + ] + constraints += [ + A.detach().numpy() @ theta - b.detach().view(-1).numpy() + >= -np.ones(A.size()[0]) * eps**2 + ] + + prob = cp.Problem(objective, constraints) + prob.solve(solver=cp.MOSEK, verbose=True) + return torch.from_numpy(theta.value).view(-1, 1) + else: + r = torch.linalg.lstsq(A, b)[0] + N = null_space(A.detach().numpy(), rcond=rcond) + theta = cp.Variable(N.shape[1]) + + objective = cp.Minimize( + cp.sum_squares( + Q.detach().numpy() @ N @ theta - self.y.view(-1).detach().numpy() + ) + + self.lam * self.s**2 * cp.sum_squares(theta) + ) + + prob = cp.Problem(objective) + prob.solve(solver=cp.MOSEK, verbose=True) + return torch.from_numpy(N @ theta.value + r.numpy()).view(-1, 1) + + def fit_gp(self, x, y): + """ + Function to Fit GP + """ + self.x = x + self.y = y + self.n = list(self.x.size())[0] + self.d = list(self.x.size())[1] + + if self.n < self.m: + self.dual = True + else: + self.dual = False + + if self.primal == True: + self.dual = False + + self.data = True + self.fitted = False + return None + + def add_points(self, x, y): + if self.x is not None: + self.x = torch.cat((self.x, x), dim=0) + self.y = torch.cat((self.y, y), dim=0) + else: + self.x = x + self.y = y + + def check_conversion(self): + """ + Convert between dual and primal form + :return: + """ + if self.primal == False: + if self.n == self.m: # convert do d mode + print("Switching mode to primal.") + self.dual = False + + I = torch.eye(int(self.m)).double() + Z_ = self.linear_kernel(torch.t(self.Q), torch.t(self.Q)) + self.V = Z_ + self.s * self.s * self.lam * torch.eye( + int(self.m), dtype=torch.float64 + ) + self.invV, _ = torch.solve(I, self.V) + + def get_invV(self): + self.precompute() + + if self.dual: + I = torch.eye(self.m).double() + Z_ = self.linear_kernel(torch.t(self.Q), torch.t(self.Q)) + self.V = Z_ + self.s * self.s * self.lam * torch.eye( + self.m, dtype=torch.float64 + ) + self.invV, _ = torch.solve(I, self.V) + return self.invV + else: + return self.invV + + def precompute(self): + if self.fitted == False: + if len(self.to_add) > 0: + # something to add via low rank update + for i in range(len(self.to_add)): + newx = self.to_add[i][0] + newy = self.to_add[i][1] + + # rank one update + emb = self.embed(newx) + + if self.dual: # via Shur complements + newKinv = torch.zeros(size=(self.n + 1, self.n + 1)).double() + M = self.invK @ self.Q + c = 1.0 / ( + (self.s**2 * self.lam + emb @ emb.T) + - emb @ self.Q.T @ M @ emb.T + ) + + newKinv[0 : self.n, 0 : self.n] = ( + self.invK + c * M @ emb.T @ emb @ M.T + ) + newKinv[0 : self.n, self.n] = (-M @ emb.T * c).view(-1) + newKinv[self.n, 0 : self.n] = (-emb @ M.T * c).view(-1) + newKinv[self.n, self.n] = c.view(-1) + + self.invK = newKinv + + self.add_points(newx, newy) + self.n = self.n + 1 + self.Q = self.embed(self.x) + + self.invK_V = (1.0 / self.lam) * ( + -self.Q.T @ self.invK @ self.Q + torch.eye(int(self.m)) + ) + + else: # via Woodbury + c = 1 + emb @ self.invV @ emb.T + self.invV = ( + self.invV - (self.invV @ emb.T @ emb @ self.invV) / c + ) + self.add_points(newx, newy) + self.n = self.n + 1 + self.Q = self.embed(self.x) + # add point + + self.check_conversion() + + self.fitted = True + self.to_add = [] + + elif self.data == True: # just compute the + self.Q = self.embed(self.x) + if not self.dual: + I = torch.eye(int(self.m)).double() + Z_ = self.Q.T @ self.Q + self.V = Z_ + self.s**2 * self.lam * I + self.invV = torch.pinverse(self.V, rcond=1e-10) + else: + I = torch.eye(self.n).double() + Z_ = self.Q @ self.Q.T + self.K = Z_ + self.s * self.s * self.lam * I + # self.invK, _ = torch.solve(I, self.K) + self.invK = torch.pinverse(self.K) + self.invK_V = (1.0 / self.lam) * ( + -self.Q.T @ self.invK @ self.Q + torch.eye(int(self.m)) + ) + self.fitted = True + else: + I = torch.eye(int(self.m)).double() + self.V = self.s**2 * self.lam * I + self.invV = torch.pinverse(self.V, rcond=1e-10) + else: + pass + + def theta_mean(self, var=False, prior=False): + + self.precompute() + if self.fitted == True and prior == False: + if self.dual: + theta_mean = self.Q.T @ self.invK @ self.y + Z = self.invK_V + else: + theta_mean = self.invV @ self.Q.T @ self.y + Z = self.s**2 * self.invV + else: + theta_mean = 0 * torch.ones(size=(self.m, 1)).double() + + if var is False: + return theta_mean + else: + return (theta_mean, Z) + + def mean_std(self, xtest): + """ + Calculate mean and variance for GP at xtest points + """ + # self.precompute() + embeding = self.embed(xtest) + + # mean + theta_mean = self.theta_mean() + ymean = embeding @ theta_mean + + # std + if not self.dual: + diagonal = self.s**2 * torch.einsum( + "ij,jk,ik->i", (embeding, self.invV, embeding) + ).view(-1, 1) + else: + diagonal = torch.einsum( + "ij,jk,ik->i", (embeding, self.invK_V, embeding) + ).view(-1, 1) + + ystd = torch.sqrt(diagonal) + return (ymean, ystd) + + def sample_matheron(self, xtest, kernel_object, size=1): + basis = self.get_basis_size() + zeros = torch.zeros(size=(basis, size), dtype=torch.float64) + random_vector = torch.normal(mean=zeros, std=1.0) + + Z = self.lam * torch.eye(basis, dtype=torch.float64) + L = torch.linalg.cholesky(Z.transpose(-2, -1).conj()).transpose(-2, -1).conj() + theta = torch.mm(L, random_vector) + self.prior_mean + + f_prior_xtest = torch.mm(self.embed(xtest), theta) + f_prior_x = torch.mm(self.embed(self.x), theta) + + K_star = kernel_object.kernel(self.x, xtest) + N = self.x.size()[0] + K = kernel_object.kernel(self.x, self.x) + self.s**2 * self.lam * torch.eye(N) + + f = f_prior_xtest + K_star @ torch.pinverse(K) @ (self.y - f_prior_x) + return f + + def sample_theta(self, size=1, prior=False): + + basis = self.get_basis_size() + + zeros = torch.zeros(size=(basis, size), dtype=torch.float64) + random_vector = torch.normal(mean=zeros, std=1.0).double() + self.precompute() + + if self.fitted == True and prior == False: + self.L = torch.linalg.cholesky(self.get_invV()) * self.s + theta = self.theta_mean().view(-1, 1) + print(theta.size()) + print(self.L.size()) + print(random_vector.size()) + theta = theta + torch.mm(self.L, random_vector) + else: + Z = (self.lam) * torch.eye(basis, dtype=torch.float64) + L = ( + torch.linalg.cholesky(Z.transpose(-2, -1).conj()) + .transpose(-2, -1) + .conj() + ) + theta = torch.mm(L, random_vector) + self.prior_mean + + return theta + + def theta_mean_constrained(self, weights=None, B=1): + if weights is None: + weights = torch.ones(self.n).double() / self.n + + Q = self.embed(self.x) + theta = cp.Variable(int(torch.sum(self.m).detach().view(-1).numpy())) + objective = cp.Minimize( + cp.sum( + weights + @ cp.square( + Q.detach().numpy() @ theta - self.y.view(-1).detach().numpy() + ) + ) + ) + zero = np.zeros(int(torch.sum(self.m))) + constraints = [cp.SOC(theta @ zero + B, theta)] + prob = cp.Problem(objective, constraints) + prob.solve(solver=cp.MOSEK) + return torch.from_numpy(theta.value).view(-1, 1) + + def theta_absolute_deviation(self, weights=None, reg=None): + if weights is None: + weights = torch.ones(self.x.size()[0]) + + if reg is None: # standard regularization + Q = self.embed(self.x) + theta = cp.Variable((int(torch.sum(self.m)), 1)) + objective = cp.Minimize( + cp.sum(weights @ cp.abs(Q.numpy() @ theta - self.y.numpy())) + + self.s * self.lam * cp.norm2(theta) + ) + prob = cp.Problem(objective) + prob.solve() + return torch.from_numpy(theta.value) + else: # custom regularization + Q = self.embed(self.x) + theta = cp.Variable((int(torch.sum(self.m)), 1)) + objective = cp.Minimize( + cp.sum(weights @ cp.abs(Q.numpy() @ theta - self.y.numpy())) + + reg * cp.norm2(theta) + ) + prob = cp.Problem(objective) + prob.solve(solver=cp.MOSEK) + return torch.from_numpy(theta.value) + + def theta_absolute_deviation_constrained(self, weights=None, B=1): + if weights is None: + weights = torch.ones(self.x.size()[0]) + Q = self.embed(self.x) + theta = cp.Variable(int(torch.sum(self.m).detach().view(-1).numpy())) + + objective = cp.Minimize( + cp.sum( + weights + @ cp.abs(Q.detach().numpy() @ theta - self.y.view(-1).detach().numpy()) + ) + ) + zero = np.zeros(int(torch.sum(self.m))) + constraints = [cp.SOC(theta @ zero + B, theta)] + prob = cp.Problem(objective, constraints) + prob.solve(solver=cp.MOSEK) + return torch.from_numpy(theta.value).view(-1, 1) + + def theta_chebyschev_approximation(self, eps=1.0): + Q = self.embed(self.x).detach().numpy() + y = self.y.view(-1).detach().numpy() + + theta = cp.Variable(int(torch.sum(self.m).detach().view(-1).numpy())) + objective = cp.Minimize(cp.sum_squares(theta)) + constraints = [cp.abs(Q @ theta - y) <= eps] + + prob = cp.Problem(objective, constraints) + prob.solve(solver=cp.MOSEK) + res = torch.from_numpy(theta.value).view(-1, 1) + return res + + def interpolation(self, eps=0.0): + Q = self.embed(self.x).detach().numpy() + y = self.y.view(-1).detach().numpy() + theta = cp.Variable(int(torch.sum(self.m).detach().view(-1).numpy())) + objective = cp.Minimize(cp.sum_squares(theta)) + constraints = [Q @ theta == y] + + prob = cp.Problem(objective, constraints) + prob.solve() + res = torch.from_numpy(theta.value).view(-1, 1) + + return res + + def mean_squared(self, xtest, weights=None, B=None, theta=False, reg=None): + embeding = self.embed(xtest) + + if B is not None: + theta_mean = self.theta_mean_constrained(weights=weights, B=B) + else: + theta_mean = self.theta_mean(weights=weights, reg=reg) + ymean = torch.mm(embeding, theta_mean) + if theta == True: + return ymean, theta_mean + else: + return ymean + + def mean_aboslute_deviation(self, xtest, weights=None, B=None, theta=False): + embeding = self.embed(xtest) + if B is not None: + theta_mean = self.theta_absolute_deviation_constrained(weights=weights, B=B) + else: + theta_mean = self.theta_absolute_deviation(weights=weights) + ymean = torch.mm(embeding, theta_mean) + if theta == True: + return ymean, theta_mean + else: + return ymean + + """ Hessian """ - def mean_gradient_hessian(self, xtest, hessian=False): - hessian_mu = torch.zeros(size=(self.d, self.d), dtype=torch.float64) - xtest.requires_grad_(True) + def mean_gradient_hessian(self, xtest, hessian=False): + hessian_mu = torch.zeros(size=(self.d, self.d), dtype=torch.float64) + xtest.requires_grad_(True) - # xtest.retain_grad() - mu = self.mean_std(xtest)[0] - # mu.backward(retain_graph=True) + # xtest.retain_grad() + mu = self.mean_std(xtest)[0] + # mu.backward(retain_graph=True) - # nabla_mu = xtest.grad - nabla_mu = grad(mu, xtest, create_graph=True)[0][0] + # nabla_mu = xtest.grad + nabla_mu = grad(mu, xtest, create_graph=True)[0][0] - if hessian == False: - return nabla_mu - else: - for i in range(self.d): - hessian_mu[i, :] = grad(nabla_mu[i], xtest, create_graph=True, retain_graph=True)[0][0] - return [nabla_mu, hessian_mu] + if hessian == False: + return nabla_mu + else: + for i in range(self.d): + hessian_mu[i, :] = grad( + nabla_mu[i], xtest, create_graph=True, retain_graph=True + )[0][0] + return [nabla_mu, hessian_mu] - """ + """ Optimization """ - def ucb(self, xtest, beta = lambda :2., bound = None, lcb = False): - - if bound is not None: - mu, V = self.theta_mean(var = True) - mu = mu.T - Phi = self.embed(xtest) - ucb = torch.zeros(size = (xtest.size()[0],1)).double() - - theta = cp.Variable(self.get_basis_size()) - for i in range(xtest.size()[0]): - phi = Phi[i,:] - if lcb: - objective = cp.Minimize(phi @ theta) - else: - objective = cp.Maximize(phi @ theta) - - constraints = [] - constraints += [cp.quad_form(mu.view(-1)-theta,V) <= beta()] - constraints += [cp.sum_squares(theta) <= bound] - prob = cp.Problem(objective, constraints) - prob.solve() - ucb[i,0] = prob.value - return ucb - else: - mu, sigma = self.mean_std(xtest) - if lcb: - return mu - beta()*sigma - else: - return mu + beta() * sigma - - - def lcb(self, xtest, beta = lambda :2, bound = None): - return self.ucb(xtest, beta = beta, bound = bound, lcb = True) - - - def ucb_optimize(self, beta, multistart=25, lcb=False, minimizer="L-BFGS-B"): - - # precompute important (theta) - theta_mean, K = self.theta_mean(var=True) - - if lcb == False: - fun = lambda x: - (self.embed(torch.from_numpy(x).view(1, -1)) @ theta_mean + \ - beta * torch.sqrt(self.embed(torch.from_numpy(x).view(1, -1)) @ K @ self.embed( - torch.from_numpy(x).view(1, -1)).T)).detach().numpy()[0] - else: - fun = lambda x: - (self.embed(torch.from_numpy(x).view(1, -1)) @ theta_mean - \ - beta * torch.sqrt(self.embed(torch.from_numpy(x).view(1, -1)) @ K @ self.embed( - torch.from_numpy(x).view(1, -1)).T).detach().numpy()[0]).numpy()[0] - - if self.bounds == None: - mybounds = tuple([(-self.diameter, self.diameter) for _ in range(self.d)]) - else: - mybounds = self.bounds - - results = [] - for j in range(multistart): - - x0 = np.random.randn(self.d) - for i in range(self.d): - x0[i] = np.random.uniform(mybounds[i][0], mybounds[i][1]) - - if minimizer == "L-BFGS-B": - res = minimize(fun, x0, method="L-BFGS-B", jac=None, tol=0.0001, bounds=mybounds) - solution = res.x - else: - raise AssertionError("Wrong optimizer selected.") - - results.append([solution, -fun(solution)]) - - results = np.array(results) - index = np.argmax(results[:, 1]) - solution = results[index, 0] - return (torch.from_numpy(solution).view(1, -1), -torch.from_numpy(fun(solution))) - - def sample_and_optimize(self, xtest=None, multistart=25, minimizer="L-BFGS-B", grid=100, verbose=0): - ''' - Sample functions from Gaussian Process and take Maximum using - first order maximization - ''' - - # sample linear approximating - theta = self.sample_theta() - - # get bounds - if self.bounds == None: - mybounds = tuple([(-self.diameter, self.diameter) for _ in range(self.d)]) - else: - mybounds = self.bounds - - fun = lambda x: -torch.mm(torch.t(theta), torch.t(self.embed(torch.from_numpy(x).view(1, -1)))).numpy()[0] - - results = [] - for j in range(multistart): - x0 = np.random.randn(self.d) - for i in range(self.d): - x0[i] = np.random.uniform(mybounds[i][0], mybounds[i][1]) - - if minimizer == "L-BFGS-B": - res = minimize(fun, x0, method="L-BFGS-B", jac=None, tol=0.0001, bounds=mybounds) - solution = res.x - else: - raise AssertionError("Wrong optimizer selected.") - - results.append([solution, -fun(solution)]) - results = np.array(results) - index = np.argmax(results[:, 1]) - solution = results[index, 0] - - return (torch.from_numpy(solution), -torch.from_numpy(fun(solution))) - - def sample(self, xtest, size=1, prior=False): - ''' - Sample functions from Gaussian Process - ''' - theta = self.sample_theta(size=size, prior=prior) - f = torch.mm(self.embed(xtest), theta) - return f - - def sample_and_max(self, xtest, size=1): - ''' - Sample functions from Gaussian Process and take Maximum - ''' - f = self.sample(xtest, size=size) - index = np.argmax(f, axis=0) - return (xtest[index, :], f[index, :]) - - def get_kernel(self): - embeding = self.embed(self.x) - Z_ = self.linear_kernel(embeding, embeding) - K = (Z_ + self.s * self.s * self.lam * torch.eye(int(self.n), dtype=torch.float64)) - return K - - def residuals(self): - mu, _ = self.mean_std(self.x) - out = torch.sum((mu - self.y) ** 2) - return out + def ucb(self, xtest, beta=lambda: 2.0, bound=None, lcb=False): + + if bound is not None: + mu, V = self.theta_mean(var=True) + mu = mu.T + Phi = self.embed(xtest) + ucb = torch.zeros(size=(xtest.size()[0], 1)).double() + + theta = cp.Variable(self.get_basis_size()) + for i in range(xtest.size()[0]): + phi = Phi[i, :] + if lcb: + objective = cp.Minimize(phi @ theta) + else: + objective = cp.Maximize(phi @ theta) + + constraints = [] + constraints += [cp.quad_form(mu.view(-1) - theta, V) <= beta()] + constraints += [cp.sum_squares(theta) <= bound] + prob = cp.Problem(objective, constraints) + prob.solve() + ucb[i, 0] = prob.value + return ucb + else: + mu, sigma = self.mean_std(xtest) + if lcb: + return mu - beta() * sigma + else: + return mu + beta() * sigma + + def lcb(self, xtest, beta=lambda: 2, bound=None): + return self.ucb(xtest, beta=beta, bound=bound, lcb=True) + + def ucb_optimize(self, beta, multistart=25, lcb=False, minimizer="L-BFGS-B"): + + # precompute important (theta) + theta_mean, K = self.theta_mean(var=True) + + if lcb == False: + fun = ( + lambda x: -( + self.embed(torch.from_numpy(x).view(1, -1)) @ theta_mean + + beta + * torch.sqrt( + self.embed(torch.from_numpy(x).view(1, -1)) + @ K + @ self.embed(torch.from_numpy(x).view(1, -1)).T + ) + ) + .detach() + .numpy()[0] + ) + else: + fun = lambda x: -( + self.embed(torch.from_numpy(x).view(1, -1)) @ theta_mean + - beta + * torch.sqrt( + self.embed(torch.from_numpy(x).view(1, -1)) + @ K + @ self.embed(torch.from_numpy(x).view(1, -1)).T + ) + .detach() + .numpy()[0] + ).numpy()[0] + + if self.bounds == None: + mybounds = tuple([(-self.diameter, self.diameter) for _ in range(self.d)]) + else: + mybounds = self.bounds + + results = [] + for j in range(multistart): + + x0 = np.random.randn(self.d) + for i in range(self.d): + x0[i] = np.random.uniform(mybounds[i][0], mybounds[i][1]) + + if minimizer == "L-BFGS-B": + res = minimize( + fun, x0, method="L-BFGS-B", jac=None, tol=0.0001, bounds=mybounds + ) + solution = res.x + else: + raise AssertionError("Wrong optimizer selected.") + + results.append([solution, -fun(solution)]) + + results = np.array(results) + index = np.argmax(results[:, 1]) + solution = results[index, 0] + return ( + torch.from_numpy(solution).view(1, -1), + -torch.from_numpy(fun(solution)), + ) + + def sample_and_optimize( + self, xtest=None, multistart=25, minimizer="L-BFGS-B", grid=100, verbose=0 + ): + """ + Sample functions from Gaussian Process and take Maximum using + first order maximization + """ + + # sample linear approximating + theta = self.sample_theta() + + # get bounds + if self.bounds == None: + mybounds = tuple([(-self.diameter, self.diameter) for _ in range(self.d)]) + else: + mybounds = self.bounds + + fun = lambda x: -torch.mm( + torch.t(theta), torch.t(self.embed(torch.from_numpy(x).view(1, -1))) + ).numpy()[0] + + results = [] + for j in range(multistart): + x0 = np.random.randn(self.d) + for i in range(self.d): + x0[i] = np.random.uniform(mybounds[i][0], mybounds[i][1]) + + if minimizer == "L-BFGS-B": + res = minimize( + fun, x0, method="L-BFGS-B", jac=None, tol=0.0001, bounds=mybounds + ) + solution = res.x + else: + raise AssertionError("Wrong optimizer selected.") + + results.append([solution, -fun(solution)]) + results = np.array(results) + index = np.argmax(results[:, 1]) + solution = results[index, 0] + + return (torch.from_numpy(solution), -torch.from_numpy(fun(solution))) + + def sample(self, xtest, size=1, prior=False): + """ + Sample functions from Gaussian Process + """ + theta = self.sample_theta(size=size, prior=prior) + f = torch.mm(self.embed(xtest), theta) + return f + + def sample_and_max(self, xtest, size=1): + """ + Sample functions from Gaussian Process and take Maximum + """ + f = self.sample(xtest, size=size) + index = np.argmax(f, axis=0) + return (xtest[index, :], f[index, :]) + + def get_kernel(self): + embeding = self.embed(self.x) + Z_ = self.linear_kernel(embeding, embeding) + K = Z_ + self.s * self.s * self.lam * torch.eye( + int(self.n), dtype=torch.float64 + ) + return K + + def residuals(self): + mu, _ = self.mean_std(self.x) + out = torch.sum((mu - self.y) ** 2) + return out if __name__ == "__main__": - N = 10 - s = 0.1 - n = 256 - L_infinity_ball = 0.5 - - d = 1 - m = 128 - - xtest = torch.from_numpy(interval(n, d, L_infinity_ball=L_infinity_ball)) - x = torch.from_numpy(np.random.uniform(-L_infinity_ball, L_infinity_ball, N)).view(-1, 1) - - F_true = lambda x: torch.sin(x * 4) ** 2 - 0.1 - F = lambda x: F_true(x) + s * torch.randn(x.size()[0]).view(-1, 1).double() - y = F(x) - - emb = RFFEmbedding(m=m, gamma=0.1) - Reggr = KernelizedFeatures(embedding=emb, m=m, d=1) - Reggr.fit_gp(x, y) - Reggr.visualize(xtest, f_true=F_true) + N = 10 + s = 0.1 + n = 256 + L_infinity_ball = 0.5 + + d = 1 + m = 128 + + xtest = torch.from_numpy(interval(n, d, L_infinity_ball=L_infinity_ball)) + x = torch.from_numpy(np.random.uniform(-L_infinity_ball, L_infinity_ball, N)).view( + -1, 1 + ) + + F_true = lambda x: torch.sin(x * 4) ** 2 - 0.1 + F = lambda x: F_true(x) + s * torch.randn(x.size()[0]).view(-1, 1).double() + y = F(x) + + emb = RFFEmbedding(m=m, gamma=0.1) + Reggr = KernelizedFeatures(embedding=emb, m=m, d=1) + Reggr.fit_gp(x, y) + Reggr.visualize(xtest, f_true=F_true) diff --git a/stpy/continuous_processes/mkl_estimator.py b/stpy/continuous_processes/mkl_estimator.py index 59e1ca8..639541d 100755 --- a/stpy/continuous_processes/mkl_estimator.py +++ b/stpy/continuous_processes/mkl_estimator.py @@ -7,213 +7,271 @@ from stpy.regularization.regularizer import Regularizer from stpy.regularization.simplex_regularizer import DirichletRegularizer, SupRegularizer + class MultipleKernelLearner(GaussianProcess): - def __init__(self, kernel_objects, - lam: float =1.0, - s: float = 0.01, - opt: str = 'closed', - regularizer: Regularizer = None): - - self.kernel_objects = kernel_objects - self.no_models = len(kernel_objects) - self.regularizer = regularizer - self.s = s - self.lam = lam - self.opt = opt - - self.var = 'fixed' - - def fit(self): - self.fit_gp(self.x,self.y) - - def fit_gp(self, x, y): - self.x = x - self.y = y - (self.n, self.d) = self.x.size() - - self.Ks = [] - for i in range(self.no_models): - self.Ks.append(self.kernel_objects[i].kernel(x,x)) - - if self.opt == 'sdp': - alpha = cp.Variable(self.no_models) - u = cp.Variable(1) - A = None - for i in range(self.no_models): - if A is None: - A = self.Ks[i] * alpha[i] - else: - A += self.Ks[i] * alpha[i] - A = A + np.eye(self.n)*self.lam*self.s**2 - constraints = [] - l = cp.reshape(u, (1, 1)) - G = cp.bmat([[A, y.numpy()], [y.numpy().T, l]]) - constraints += [G >> 0] - constraints += [alpha >= 0.] - constraints += [cp.sum(alpha) == 1.] - - objective = cp.Minimize( u) - prob = cp.Problem(objective, constraints) - prob.solve( solver = cp.MOSEK,verbose = True) - - elif self.opt == "closed": - alpha = cp.Variable(self.no_models, nonneg=True) - A = sum([self.Ks[i] * alpha[i] for i in range(self.no_models)])+ np.eye(self.n) * self.lam * self.s ** 2 - constraints = [cp.sum(alpha)==1, alpha<=1] - objective = cp.matrix_frac(self.y.numpy(), A) - if self.regularizer is not None and self.regularizer.is_convex(): - objective = objective + self.regularizer.get_regularizer_cvxpy()(alpha) - prob = cp.Problem(cp.Minimize(objective), constraints) - prob.solve(solver=cp.MOSEK, verbose=False) - - elif self.regularizer is not None and not self.regularizer.is_convex(): - obj,con,vars = self.regularizer.get_cvxpy_objectives_constraints_variables(self.no_models) - no_problems = len(con) - vals = [] - args = [] - for i in range(no_problems): - prob = cp.Problem(cp.Minimize(objective+obj[i](alpha,*vars)), constraints + con[i](alpha, *vars)) - prob.solve(solver=cp.MOSEK, verbose=False) - vals.append(prob.value) - args.append(alpha.value) - alpha.value = args[np.argmin(vals)] - else: - prob = cp.Problem(cp.Minimize(objective), constraints) - prob.solve(solver=cp.MOSEK, verbose=False) - - self.alphas = torch.from_numpy(alpha.value) - if self.regularizer is not None: - print (self.regularizer.name, self.alphas) - else: - print("No", self.alphas) - self.K = torch.sum(torch.stack([alpha*K for alpha,K in zip(self.alphas, self.Ks)]), dim = 0) + np.eye(self.n)*self.lam*self.s**2 - self.fitted = True - - def execute(self, xtest): - if self.fitted == True: - Ks = [self.kernel_objects[i].kernel(self.x, xtest) for i in range(self.no_models)] - K_star = torch.sum(torch.stack([alpha * K for alpha, K in zip(self.alphas, Ks)]), dim=0) - else: - K_star = None - Ks = [self.kernel_objects[i].kernel(xtest, xtest) for i in range(self.no_models)] - K_star_star = torch.sum(torch.stack([alpha * K for alpha, K in zip(self.alphas, Ks)]), dim=0) - return (K_star, K_star_star) - - # def log_marginal(self, kernel, X, weight): - # pass - - def mean(self, xtest): - K_star, K_star_star = self.execute(xtest) - self.A = torch.linalg.lstsq(self.K, self.y)[0] - ymean = torch.mm(K_star, self.A) - return ymean - - def mean_std(self, xtest, full=False, reuse=False): - K_star, K_star_star = self.execute(xtest) - self.A = torch.linalg.lstsq(self.K, self.y)[0] - ymean = torch.mm(K_star, self.A) - - if self.var == 'fixed': - ystd = self.std_fixed(xtest) - elif self.var == 'true': - ystd = self.std_opt(xtest) - return (ymean, ystd) - - def lcb(self, xtest: torch.Tensor, type=None, arg=False, sign=1.): - theta = cp.Variable((self.alpha, 1)) - args = [] - n = xtest.size()[0] - values = torch.zeros(size=(n, 1)).double() - Phi = self.embed(xtest) - - for j in range(n): - objective = sign * Phi[j, :] @ theta - if (self.constraints is not None and not self.constraints.is_convex()): - value, theta_lcb = self.objective_on_non_convex_confidence_set(theta, objective, type=type) - elif not self.regularizer.is_convex(): - value, theta_lcb = self.objective_on_non_convex_confidence_set_bisection(theta, objective, - type=type) - else: - value, theta_lcb = self.objective_on_confidence_set(theta, objective, type=type) - - values[j] = sign * value - if arg: - args.append(theta_lcb) - - if args: - return values, args - else: - return values - - def ucb(self, xtest): - pass - - def std_opt(self, xtest): - N = xtest.size()[0] - for i in range(N): - x = xtest[i,:] - theta = cp.Variable(self.n*self.no_models) - M = torch.block_diag(self.Ks) - cp.norm(theta,p=2)*theta[i] - - def std_fixed(self, xtest): - K_star, K_star_star = self.execute(xtest) - self.B = torch.t(torch.linalg.solve(self.K, torch.t(K_star))) - first = torch.diag(K_star_star).view(-1, 1) - second = torch.einsum('ij,ji->i', (self.B, torch.t(K_star))).view(-1, 1) - variance = first - second - ystd = torch.sqrt(variance) - return ystd - - def sample(self, xtest, size=1): - pass + def __init__( + self, + kernel_objects, + lam: float = 1.0, + s: float = 0.01, + opt: str = "closed", + regularizer: Regularizer = None, + ): + + self.kernel_objects = kernel_objects + self.no_models = len(kernel_objects) + self.regularizer = regularizer + self.s = s + self.lam = lam + self.opt = opt + + self.var = "fixed" + + def fit(self): + self.fit_gp(self.x, self.y) + + def fit_gp(self, x, y): + self.x = x + self.y = y + (self.n, self.d) = self.x.size() + + self.Ks = [] + for i in range(self.no_models): + self.Ks.append(self.kernel_objects[i].kernel(x, x)) + + if self.opt == "sdp": + alpha = cp.Variable(self.no_models) + u = cp.Variable(1) + A = None + for i in range(self.no_models): + if A is None: + A = self.Ks[i] * alpha[i] + else: + A += self.Ks[i] * alpha[i] + A = A + np.eye(self.n) * self.lam * self.s**2 + constraints = [] + l = cp.reshape(u, (1, 1)) + G = cp.bmat([[A, y.numpy()], [y.numpy().T, l]]) + constraints += [G >> 0] + constraints += [alpha >= 0.0] + constraints += [cp.sum(alpha) == 1.0] + + objective = cp.Minimize(u) + prob = cp.Problem(objective, constraints) + prob.solve(solver=cp.MOSEK, verbose=True) + + elif self.opt == "closed": + alpha = cp.Variable(self.no_models, nonneg=True) + A = ( + sum([self.Ks[i] * alpha[i] for i in range(self.no_models)]) + + np.eye(self.n) * self.lam * self.s**2 + ) + constraints = [cp.sum(alpha) == 1, alpha <= 1] + objective = cp.matrix_frac(self.y.numpy(), A) + if self.regularizer is not None and self.regularizer.is_convex(): + objective = objective + self.regularizer.get_regularizer_cvxpy()(alpha) + prob = cp.Problem(cp.Minimize(objective), constraints) + prob.solve(solver=cp.MOSEK, verbose=False) + + elif self.regularizer is not None and not self.regularizer.is_convex(): + obj, con, vars = ( + self.regularizer.get_cvxpy_objectives_constraints_variables( + self.no_models + ) + ) + no_problems = len(con) + vals = [] + args = [] + for i in range(no_problems): + prob = cp.Problem( + cp.Minimize(objective + obj[i](alpha, *vars)), + constraints + con[i](alpha, *vars), + ) + prob.solve(solver=cp.MOSEK, verbose=False) + vals.append(prob.value) + args.append(alpha.value) + alpha.value = args[np.argmin(vals)] + else: + prob = cp.Problem(cp.Minimize(objective), constraints) + prob.solve(solver=cp.MOSEK, verbose=False) + + self.alphas = torch.from_numpy(alpha.value) + if self.regularizer is not None: + print(self.regularizer.name, self.alphas) + else: + print("No", self.alphas) + self.K = ( + torch.sum( + torch.stack([alpha * K for alpha, K in zip(self.alphas, self.Ks)]), + dim=0, + ) + + np.eye(self.n) * self.lam * self.s**2 + ) + self.fitted = True + + def execute(self, xtest): + if self.fitted == True: + Ks = [ + self.kernel_objects[i].kernel(self.x, xtest) + for i in range(self.no_models) + ] + K_star = torch.sum( + torch.stack([alpha * K for alpha, K in zip(self.alphas, Ks)]), dim=0 + ) + else: + K_star = None + Ks = [ + self.kernel_objects[i].kernel(xtest, xtest) for i in range(self.no_models) + ] + K_star_star = torch.sum( + torch.stack([alpha * K for alpha, K in zip(self.alphas, Ks)]), dim=0 + ) + return (K_star, K_star_star) + + # def log_marginal(self, kernel, X, weight): + # pass + + def mean(self, xtest): + K_star, K_star_star = self.execute(xtest) + self.A = torch.linalg.lstsq(self.K, self.y)[0] + ymean = torch.mm(K_star, self.A) + return ymean + + def mean_std(self, xtest, full=False, reuse=False): + K_star, K_star_star = self.execute(xtest) + self.A = torch.linalg.lstsq(self.K, self.y)[0] + ymean = torch.mm(K_star, self.A) + + if self.var == "fixed": + ystd = self.std_fixed(xtest) + elif self.var == "true": + ystd = self.std_opt(xtest) + return (ymean, ystd) + + def lcb(self, xtest: torch.Tensor, type=None, arg=False, sign=1.0): + theta = cp.Variable((self.alpha, 1)) + args = [] + n = xtest.size()[0] + values = torch.zeros(size=(n, 1)).double() + Phi = self.embed(xtest) + + for j in range(n): + objective = sign * Phi[j, :] @ theta + if self.constraints is not None and not self.constraints.is_convex(): + value, theta_lcb = self.objective_on_non_convex_confidence_set( + theta, objective, type=type + ) + elif not self.regularizer.is_convex(): + value, theta_lcb = ( + self.objective_on_non_convex_confidence_set_bisection( + theta, objective, type=type + ) + ) + else: + value, theta_lcb = self.objective_on_confidence_set( + theta, objective, type=type + ) + + values[j] = sign * value + if arg: + args.append(theta_lcb) + + if args: + return values, args + else: + return values + + def ucb(self, xtest): + pass + + def std_opt(self, xtest): + N = xtest.size()[0] + for i in range(N): + x = xtest[i, :] + theta = cp.Variable(self.n * self.no_models) + M = torch.block_diag(self.Ks) + cp.norm(theta, p=2) * theta[i] + + def std_fixed(self, xtest): + K_star, K_star_star = self.execute(xtest) + self.B = torch.t(torch.linalg.solve(self.K, torch.t(K_star))) + first = torch.diag(K_star_star).view(-1, 1) + second = torch.einsum("ij,ji->i", (self.B, torch.t(K_star))).view(-1, 1) + variance = first - second + ystd = torch.sqrt(variance) + return ystd + + def sample(self, xtest, size=1): + pass + if __name__ == "__main__": - from stpy.continuous_processes.gauss_procc import GaussianProcess - from stpy.helpers.helper import interval_torch - import matplotlib.pyplot as plt - n = 512 - N = 5 - s = 0.1 - d = 1 - - xtest = interval_torch(n,d) - x = interval_torch(N,d) - - kernel1 = KernelFunction(gamma = 0.05) - kernel2 = KernelFunction(kernel_name="polynomial", power = 5) - kernel3 = KernelFunction(kernel_name="polynomial", power=3) - kernel4 = KernelFunction(kernel_name="polynomial", power=2) - kernel5 = KernelFunction(kernel_name="polynomial", power=1) - kernel6 = KernelFunction(kernel_name="polynomial", power=1) - - kernels = [kernel1, kernel2,kernel3, kernel4, kernel5, kernel6] - - GP = GaussianProcess(kernel=kernel1) - torch.manual_seed(2) - y = GP.sample(x) - - # sup inverse barrier - for lam in [0.01,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,0.99,0.9999]: - regularizer = SupRegularizer(d = len(kernels), lam = lam, constrained=True, version='1') - mkl = MultipleKernelLearner(kernels, regularizer= regularizer) - mkl.fit_gp(x,y) - mkl.visualize(xtest, size = 0, show = False, fig = False, color = 'tab:blue', label = " sup:"+str(lam)) - regularizer = SupRegularizer(d=len(kernels), lam=lam, constrained=True, version='2') - mkl = MultipleKernelLearner(kernels, regularizer=regularizer) - mkl.fit_gp(x, y) - mkl.visualize(xtest, size=0, show=False, fig=False, color='tab:green', label=" sup:" + str(lam)) - - # dirichlet mixture - regularizer = DirichletRegularizer(d=len(kernels), lam=lam, constrained=True) - mkl = MultipleKernelLearner(kernels, regularizer=regularizer) - mkl.fit_gp(x, y) - mkl.visualize(xtest, size=0, show=False, fig=False, color='tab:red', label = " dirichlet") - - # no regularizer - mkl = MultipleKernelLearner(kernels, regularizer=None) - mkl.fit_gp(x, y) - mkl.visualize(xtest, size=0, show=False, fig=False, color='tab:orange', label = " no") - - plt.show() + from stpy.continuous_processes.gauss_procc import GaussianProcess + from stpy.helpers.helper import interval_torch + import matplotlib.pyplot as plt + + n = 512 + N = 5 + s = 0.1 + d = 1 + + xtest = interval_torch(n, d) + x = interval_torch(N, d) + + kernel1 = KernelFunction(gamma=0.05) + kernel2 = KernelFunction(kernel_name="polynomial", power=5) + kernel3 = KernelFunction(kernel_name="polynomial", power=3) + kernel4 = KernelFunction(kernel_name="polynomial", power=2) + kernel5 = KernelFunction(kernel_name="polynomial", power=1) + kernel6 = KernelFunction(kernel_name="polynomial", power=1) + + kernels = [kernel1, kernel2, kernel3, kernel4, kernel5, kernel6] + + GP = GaussianProcess(kernel=kernel1) + torch.manual_seed(2) + y = GP.sample(x) + + # sup inverse barrier + for lam in [0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99, 0.9999]: + regularizer = SupRegularizer( + d=len(kernels), lam=lam, constrained=True, version="1" + ) + mkl = MultipleKernelLearner(kernels, regularizer=regularizer) + mkl.fit_gp(x, y) + mkl.visualize( + xtest, + size=0, + show=False, + fig=False, + color="tab:blue", + label=" sup:" + str(lam), + ) + regularizer = SupRegularizer( + d=len(kernels), lam=lam, constrained=True, version="2" + ) + mkl = MultipleKernelLearner(kernels, regularizer=regularizer) + mkl.fit_gp(x, y) + mkl.visualize( + xtest, + size=0, + show=False, + fig=False, + color="tab:green", + label=" sup:" + str(lam), + ) + + # dirichlet mixture + regularizer = DirichletRegularizer(d=len(kernels), lam=lam, constrained=True) + mkl = MultipleKernelLearner(kernels, regularizer=regularizer) + mkl.fit_gp(x, y) + mkl.visualize( + xtest, size=0, show=False, fig=False, color="tab:red", label=" dirichlet" + ) + + # no regularizer + mkl = MultipleKernelLearner(kernels, regularizer=None) + mkl.fit_gp(x, y) + mkl.visualize(xtest, size=0, show=False, fig=False, color="tab:orange", label=" no") + + plt.show() diff --git a/stpy/continuous_processes/mkl_features.py b/stpy/continuous_processes/mkl_features.py index 8b690c0..04ccda7 100755 --- a/stpy/continuous_processes/mkl_features.py +++ b/stpy/continuous_processes/mkl_features.py @@ -11,188 +11,196 @@ class MKL(Estimator): - def __init__(self, embeddings, init_weights=None, lam=0.0, s=0.1): - self.embeddings = embeddings - self.init_weights = init_weights - self.no_models = len(embeddings) - self.s = s - self.lam = lam - if self.init_weights is None: - self.init_weights = torch.ones(self.no_models) - self.weights = self.init_weights - if not isinstance(self.lam, list): - self.lam = [lam for i in range(self.no_models)] - - def get_emebed_dims(self): - self.dims = [] - for embedding in self.embeddings: - self.dims.append(embedding.get_basis_size()) - return self.dims - - def total_embed_dim(self): - sum = np.sum(self.get_emebed_dims()) - return sum - - def fit_gp(self, x, y): - - self.x = x - self.y = y - (self.n, self.d) = self.x.size() - self.total_m = self.total_embed_dim() - - self.Reggr = KernelizedFeatures(embeding=self, m=self.total_m, d=d, s=self.s) - self.Reggr.fit_gp(x, y) - - # def mean_vector(self): - # theta = torch.zeros(size = (self.total_embed_dim())) - # dims_index = torch.cumsum(torch.Tensor([0] + self.get_emebed_dims()),dim = 0).int() - # for index, emb in enumerate(self.embeddings): - # theta_small = emb.sample_theta() - # theta[dims_index[index]:dims_index[index + 1]] = theta_small.view(-1) - # return theta - - def mean_vector(self): - return self.Reggr.theta_mean() - - def mean_var(self, xtest): - # mu_avg = torch.zeros(size = (xtest.size()[0],1),dtype = torch.float64) - # var_avg = torch.zeros(size = (xtest.size()[0],1),dtype = torch.float64) - # - # for index, emb in enumerate(self.embeddings): - # mu,var = emb.mean_var(xtest) - # mu_avg = mu_avg + self.weights[index]*mu - # var_avg = var_avg + self.weights[index]*var - # return [mu_avg,var_avg] - - return self.Reggr.mean_std(xtest) - - def sample(self, xtest, size=1): - # sample_avg = torch.zeros(size = (xtest.size()[0],1),dtype = torch.float64) - # - # for index, emb in enumerate(self.embeddings): - # sample = emb.sample(xtest, size = size) - # sample_avg = sample_avg + self.weights[index]*sample - return self.Reggr.sample(xtest, size=size) - - def embed(self, xtest): - n = xtest.size()[0] - Phi = torch.zeros(size=(n, int(self.total_embed_dim())), dtype=torch.float64) - dims_index = torch.cumsum(torch.Tensor([0] + self.get_emebed_dims()), dim=0).int() - - for index, embedding in enumerate(self.embeddings): - Phi[:, dims_index[index]:dims_index[index + 1]] = embedding.embed_internal(xtest) - - return Phi - - def selector_matrix(self): - dims = [] - for embedding in self.embeddings: - dims.append(embedding.get_basis_size()) - total_dim = self.total_embed_dim() - selector = torch.zeros(size=(int(total_dim), self.no_models), dtype=torch.float64) - z = 0 - for i in range(len(self.embeddings)): - selector[z:z + dims[i], i] = 1.0 - z = z + dims[i] - return torch.t(selector) - - ### - def evaluate_design(self, C, Phi): - n = Phi.size()[0] - - A = torch.lstsq(torch.t(C), torch.t(Phi))[0] - B = torch.t(A[0:n, :]) - - delta = torch.norm(B @ Phi - C, p=2) # /torch.norm(B, p = 2) #relative error - - pinv = torch.pinverse(torch.t(Phi) @ Phi) - W = C @ pinv @ torch.t(C) - - rank = torch.matrix_rank(B) - lambda_max = torch.symeig(W)[0][-1] # largest eigenvalue - - upper_bound = lambda_max * (self.s * self.s * 2 + delta) - - return [upper_bound.detach(), rank] - - def acquisiton_function(self, C, Phi, candidates): - values = [] - ranks = [] - for candidate_point in candidates: - newPhi = torch.cat((Phi, candidate_point.view(1, -1))) - values.append(self.evaluate_design(C, newPhi)[0]) - ranks.append(self.evaluate_design(C, newPhi)[1]) - - return [torch.Tensor(values), torch.Tensor(ranks)] + def __init__(self, embeddings, init_weights=None, lam=0.0, s=0.1): + self.embeddings = embeddings + self.init_weights = init_weights + self.no_models = len(embeddings) + self.s = s + self.lam = lam + if self.init_weights is None: + self.init_weights = torch.ones(self.no_models) + self.weights = self.init_weights + if not isinstance(self.lam, list): + self.lam = [lam for i in range(self.no_models)] + + def get_emebed_dims(self): + self.dims = [] + for embedding in self.embeddings: + self.dims.append(embedding.get_basis_size()) + return self.dims + + def total_embed_dim(self): + sum = np.sum(self.get_emebed_dims()) + return sum + + def fit_gp(self, x, y): + + self.x = x + self.y = y + (self.n, self.d) = self.x.size() + self.total_m = self.total_embed_dim() + + self.Reggr = KernelizedFeatures(embeding=self, m=self.total_m, d=d, s=self.s) + self.Reggr.fit_gp(x, y) + + # def mean_vector(self): + # theta = torch.zeros(size = (self.total_embed_dim())) + # dims_index = torch.cumsum(torch.Tensor([0] + self.get_emebed_dims()),dim = 0).int() + # for index, emb in enumerate(self.embeddings): + # theta_small = emb.sample_theta() + # theta[dims_index[index]:dims_index[index + 1]] = theta_small.view(-1) + # return theta + + def mean_vector(self): + return self.Reggr.theta_mean() + + def mean_var(self, xtest): + # mu_avg = torch.zeros(size = (xtest.size()[0],1),dtype = torch.float64) + # var_avg = torch.zeros(size = (xtest.size()[0],1),dtype = torch.float64) + # + # for index, emb in enumerate(self.embeddings): + # mu,var = emb.mean_var(xtest) + # mu_avg = mu_avg + self.weights[index]*mu + # var_avg = var_avg + self.weights[index]*var + # return [mu_avg,var_avg] + + return self.Reggr.mean_std(xtest) + + def sample(self, xtest, size=1): + # sample_avg = torch.zeros(size = (xtest.size()[0],1),dtype = torch.float64) + # + # for index, emb in enumerate(self.embeddings): + # sample = emb.sample(xtest, size = size) + # sample_avg = sample_avg + self.weights[index]*sample + return self.Reggr.sample(xtest, size=size) + + def embed(self, xtest): + n = xtest.size()[0] + Phi = torch.zeros(size=(n, int(self.total_embed_dim())), dtype=torch.float64) + dims_index = torch.cumsum( + torch.Tensor([0] + self.get_emebed_dims()), dim=0 + ).int() + + for index, embedding in enumerate(self.embeddings): + Phi[:, dims_index[index] : dims_index[index + 1]] = ( + embedding.embed_internal(xtest) + ) + + return Phi + + def selector_matrix(self): + dims = [] + for embedding in self.embeddings: + dims.append(embedding.get_basis_size()) + total_dim = self.total_embed_dim() + selector = torch.zeros( + size=(int(total_dim), self.no_models), dtype=torch.float64 + ) + z = 0 + for i in range(len(self.embeddings)): + selector[z : z + dims[i], i] = 1.0 + z = z + dims[i] + return torch.t(selector) + + ### + def evaluate_design(self, C, Phi): + n = Phi.size()[0] + + A = torch.lstsq(torch.t(C), torch.t(Phi))[0] + B = torch.t(A[0:n, :]) + + delta = torch.norm(B @ Phi - C, p=2) # /torch.norm(B, p = 2) #relative error + + pinv = torch.pinverse(torch.t(Phi) @ Phi) + W = C @ pinv @ torch.t(C) + + rank = torch.matrix_rank(B) + lambda_max = torch.symeig(W)[0][-1] # largest eigenvalue + + upper_bound = lambda_max * (self.s * self.s * 2 + delta) + + return [upper_bound.detach(), rank] + + def acquisiton_function(self, C, Phi, candidates): + values = [] + ranks = [] + for candidate_point in candidates: + newPhi = torch.cat((Phi, candidate_point.view(1, -1))) + values.append(self.evaluate_design(C, newPhi)[0]) + ranks.append(self.evaluate_design(C, newPhi)[1]) + + return [torch.Tensor(values), torch.Tensor(ranks)] if __name__ == "__main__": - n = 16 - N = 4 - s = 0.00000001 - d = 1 - TestFunction = MultiRKHS() - xtest = TestFunction.interval(n) - x = TestFunction.initial_guess(N) - y = TestFunction.eval(x, sigma=s) - bounds = TestFunction.bounds() - - p = 2 - embedding2 = PolynomialEmbedding(d, p, groups=None) - GP1 = KernelizedFeatures(embeding=embedding2, m=embedding2.size, d=d, s=s, - groups=None, bounds=bounds) - - map = lambda x: torch.abs(x) - embedding3 = CustomEmbedding(d, map, 1, groups=None) - - GP2 = KernelizedFeatures(embeding=embedding3, m=embedding3.size, d=d, s=s, - groups=None, bounds=bounds) - - m = 2 - gamma = 0.2 - GP3 = GaussianProcessFF(d=d, s=s, m=m, gamma=gamma, bounds=bounds, groups=None) - GP4 = GaussianProcessFF(d=d, s=s, m=m, gamma=gamma, bounds=bounds, groups=None) - - MKL = MKL([GP1, GP2], s=s) - - C = MKL.selector_matrix() - Candidates = MKL.embed(xtest) - eps = 1 - N = 1 - x = TestFunction.initial_guess(N) - - plt.close('all') - - while eps > 10e-3: - # print (x,eps) - Phi = MKL.embed(x) - # print (C.size(), Phi.size()) - print(N, MKL.evaluate_design(C, Phi)) - eps = MKL.evaluate_design(C, Phi)[0] - # N = N + 1 - score, rank = MKL.acquisiton_function(C, Phi, Candidates) - score = score + 1. / (rank - 1) - index_min = torch.argmin(score) - x_min = xtest[index_min] - - plt.plot(xtest.numpy(), torch.log(score).numpy(), 'g') - plt.plot(xtest.numpy(), rank.numpy(), 'r--') - plt.plot(x, x * 0, 'ro') - plt.plot(xtest[index_min].numpy(), torch.log(score[index_min]).numpy(), 'go') - plt.show() - - x = torch.cat((x, x_min.view(1, -1))) - - y = TestFunction.eval(x, sigma=s) - print(x) - print(y) - - MKL.fit_gp(x, y) - print("Projection:") - print("--------------") - print(C @ MKL.mean_vector()) - print("--------------") - - MKL.visualize(xtest, f_true=TestFunction.eval_noiseless) - plt.show() + n = 16 + N = 4 + s = 0.00000001 + d = 1 + TestFunction = MultiRKHS() + xtest = TestFunction.interval(n) + x = TestFunction.initial_guess(N) + y = TestFunction.eval(x, sigma=s) + bounds = TestFunction.bounds() + + p = 2 + embedding2 = PolynomialEmbedding(d, p, groups=None) + GP1 = KernelizedFeatures( + embeding=embedding2, m=embedding2.size, d=d, s=s, groups=None, bounds=bounds + ) + + map = lambda x: torch.abs(x) + embedding3 = CustomEmbedding(d, map, 1, groups=None) + + GP2 = KernelizedFeatures( + embeding=embedding3, m=embedding3.size, d=d, s=s, groups=None, bounds=bounds + ) + + m = 2 + gamma = 0.2 + GP3 = GaussianProcessFF(d=d, s=s, m=m, gamma=gamma, bounds=bounds, groups=None) + GP4 = GaussianProcessFF(d=d, s=s, m=m, gamma=gamma, bounds=bounds, groups=None) + + MKL = MKL([GP1, GP2], s=s) + + C = MKL.selector_matrix() + Candidates = MKL.embed(xtest) + eps = 1 + N = 1 + x = TestFunction.initial_guess(N) + + plt.close("all") + + while eps > 10e-3: + # print (x,eps) + Phi = MKL.embed(x) + # print (C.size(), Phi.size()) + print(N, MKL.evaluate_design(C, Phi)) + eps = MKL.evaluate_design(C, Phi)[0] + # N = N + 1 + score, rank = MKL.acquisiton_function(C, Phi, Candidates) + score = score + 1.0 / (rank - 1) + index_min = torch.argmin(score) + x_min = xtest[index_min] + + plt.plot(xtest.numpy(), torch.log(score).numpy(), "g") + plt.plot(xtest.numpy(), rank.numpy(), "r--") + plt.plot(x, x * 0, "ro") + plt.plot(xtest[index_min].numpy(), torch.log(score[index_min]).numpy(), "go") + plt.show() + + x = torch.cat((x, x_min.view(1, -1))) + + y = TestFunction.eval(x, sigma=s) + print(x) + print(y) + + MKL.fit_gp(x, y) + print("Projection:") + print("--------------") + print(C @ MKL.mean_vector()) + print("--------------") + + MKL.visualize(xtest, f_true=TestFunction.eval_noiseless) + plt.show() diff --git a/stpy/continuous_processes/nystrom_fea.py b/stpy/continuous_processes/nystrom_fea.py index a209c47..03c06e6 100755 --- a/stpy/continuous_processes/nystrom_fea.py +++ b/stpy/continuous_processes/nystrom_fea.py @@ -9,339 +9,397 @@ class NystromFeatures(Embedding): - ''' - Nystrom Features for Gaussian Kernel - ''' - - def __init__(self, kernel_object, m=100, approx="uniform", s=1., samples=100): - - self.fit = False - self.m = m - try: - self.ms = int(torch.sum(m)) - except: - self.ms = m - self.samples = samples - self.kernel_object = kernel_object - self.kernel = kernel_object.kernel - self.approx = approx - self.s = s - - def description(self): - """ - Description of GP in text - :return: string with description - """ - return "Nystrom\n" + "Appprox: " + self.approx - - def subsample(self, x, y): - if self.approx == "uniform": - C, w = self.uniform_subsampling(x, y) - elif self.approx == "leverage": - C, w = self.leverage_score_subsampling(x, y) - elif self.approx == "online_leverage": - C, w = self.sequential_leverage_score_subsampling(x, y) - return (C, w) - - def uniform_subsampling(self, x, y): - N = x.size()[0] - C = np.random.choice(N, int(self.ms)) - weights = torch.ones(self.ms) - return (C, weights) - - def leverage_score_subsampling(self, x, y): - N = x.size()[0] - from stpy.continuous_processes.gauss_procc import GaussianProcess - GP = GaussianProcess(kernel_custom=self.kernel_object, s=self.s) - GP.fit_gp(x, y) - mean, leverage_scores = GP.mean_std(x) - weights = torch.ones(self.ms) - - args = [0] - size = 1 - - for j in range(N): - point = x[j, :] - if size < self.ms: - leverage_score = float(leverage_scores[j, :]) - q = np.random.binomial(self.ms, leverage_score) - if q > 0: - args.append(j) - weights[size] = (q / float(self.ms)) / leverage_score - size = size + 1 - else: - pass - - print(args, weights) - return (args, weights) - - def sequential_leverage_score_subsampling(self, x, y): - N = x.size()[0] - d = x.size()[1] - from stpy.continuous_processes.gauss_procc import GaussianProcess - GP = GaussianProcess(kernel_custom=self.kernel_object, s=self.s) - - dts = torch.zeros(self.ms, d, dtype=torch.float64) - dts[0, :] = x[0, :] - args = [0] - size = 1 - weights = torch.ones(self.ms) - - for j in range(N): - point = x[j, :] - # print (size,x.size()) - if size < self.ms: - GP.fit_gp(dts[0:size, :], y[0:size, :]) - mean, leverage_score = GP.mean_std(point.view(1, d)) - q = np.random.binomial(self.ms, float(leverage_score)) - if q > 0: - args.append(j) - dts[size, :] = point - weights[size] = (q / float(self.ms)) / leverage_score - size = size + 1 - else: - pass - return (args, weights) - - def fit_gp(self, x, y, eps=1e-14): - ''' - Function to Fit GP - ''' - self.x = x - self.y = y - self.d = x.size()[1] - self.N = x.size()[0] - assert (self.ms <= self.N) - self.linear_kernel = KernelFunction(kernel_name="linear").linear_kernel - if self.approx == "svd": - self.xs = x - K = self.kernel(x, x) - if 3 * self.ms > self.N: - (D, V) = torch.linalg.eigh(K, UPLO='U') - V = torch.t(V)[self.N - self.ms:self.N, :].T - D = D[self.N - self.ms:self.N] - D[D <= eps] = 0 - - else: - (D, V) = torch.lobpcg(K, k=self.ms, niter=-1) - - # Dinv = torch.diag(1./D[self.N-self.ms:self.N]) - # Dinv[Dinv <=0 ] = 0 - # Dinv = torch.sqrt(Dinv) - self.eigs = D - Dinv = torch.diag(torch.sqrt(1. / D)) - # self.M = (torch.t(V)[self.N-self.ms:self.N,:]).T @ Dinv.T - self.M = V @ Dinv - # self.embed = lambda q: torch.t(torch.mm(Dinv, torch.mm(torch.t(V)[self.N-self.ms:self.N,:], self.kernel(q, self.x) ))) - self.embed = lambda q: self.kernel(q, self.xs).T @ self.M - self.C = [] - elif self.approx == 'nothing': - self.xs = self.x[0:self.ms, :] - self.M = torch.eye(self.ms).double() - self.embed = lambda q: self.kernel(q, self.xs).T @ self.M - - elif self.approx == 'positive_svd': - from sklearn.decomposition import NMF - GP = GaussianProcess(kernel=self.kernel_object) - ysample = GP.sample(x, size=self.samples) ** 2 - X = ysample - model = NMF(n_components=self.ms, max_iter=8000, tol=1e-12) - W = torch.from_numpy(model.fit_transform(X)) - H = torch.from_numpy(model.components_) - l = torch.norm(W, dim=1) - l = 1. / l - - if x.size()[1] == 1: - fs = [] - for j in range(self.ms): - fs.append(interp1d(x.view(-1).numpy(), (W.T @ torch.diag(l))[j, :].numpy())) - self.embed = lambda q: torch.cat([torch.from_numpy(fs[j](q)).view(-1, 1) for j in range(self.ms)], - dim=1) - - elif x.size()[1] == 2: - fs = [] - for j in range(self.ms): - W_j = (W.T @ torch.diag(l))[j, :].numpy() - fs.append(LinearNDInterpolator(x, W_j)) - self.embed = lambda q: torch.cat( - [torch.from_numpy(fs[j](q[:, 0], q[:, 1])).view(-1, 1) for j in range(self.ms)], dim=1) - # elif x.size()[1] == 2: - # fs = [] - # for j in range(self.ms): - # W_j = (W.T @ torch.diag(l))[j, :].numpy() - # fs.append(Rbf(x[:,0],x[:,1], W_j)) - # self.embed = lambda q: torch.cat([torch.from_numpy(fs[j](q[:,0],q[:,1])).view(-1, 1) for j in range(self.ms)], - # dim=1) - - self.C = [] - - elif self.approx == "cover": - K = self.kernel(x, x) # + self.s * self.s * torch.eye(self.N, dtype=torch.float64) - Khalf = torch.from_numpy(np.real(scipy.linalg.sqrtm(K.numpy()))) - Khalfinv = torch.pinverse(Khalf) - self.embed = lambda q: torch.t( - torch.mm(Khalfinv, self.kernel(q, self.x))) - else: - self.C, self.weights = self.subsample(x, y) - xs = x[self.C, :] - self.Dweights = torch.diag(self.weights).double() - K = torch.mm(torch.mm(self.Dweights, self.kernel(xs, xs)), - self.Dweights) # + self.s*self.s * torch.eye(self.ms, dtype=torch.float64) - #(D, V) = torch.symeig(K, eigenvectors=True) - (D, V) = torch.linalg.eigh(K) - Dinv = torch.diag(1. / D) - Dinv[Dinv <= 0] = 0 - Dinv = torch.sqrt(Dinv) - # Dinv = torch.diag(torch.pow(D[:],-0.5)) - self.embed = lambda q: torch.t( - torch.mm(Dinv, torch.mm(torch.t(V), torch.mm(self.Dweights, self.kernel(q, xs))))) - # self.embed = lambda x: torch.t(torch.mm(torch.sqrt(Dinv),torch.mm(V, self.kernel(x, xs)))) - embeding = self.embed(x) - self.Z_ = embeding.T @ embeding + self.s * self.s * torch.eye(self.ms).double() - - # self.K = (self.Z_ + self.s * self.s * torch.eye(self.ms, dtype=torch.float64)) - self.K = self.Z_ - self.Q = torch.t(embeding) - - self.fit = True - return None - - def mean_std(self, xtest): - if self.fit == False: - raise AssertionError("First fit") - else: - embeding = self.embed(xtest) - Q = self.embed(self.x) - theta_mean, _ = torch.solve(torch.mm(torch.t(Q), self.y), self.K) - ymean = torch.mm(embeding, theta_mean) - temp = torch.t(torch.solve(torch.t(embeding), self.K)[0]) - diagonal = self.s * self.s * torch.einsum('ij,ji->i', (temp, torch.t(embeding))).view(-1, 1) - yvar = torch.sqrt(diagonal) - - return (ymean, yvar) - - def outer_kernel(self): - embeding = self.embed(self.x) - # print (embeding.size()) - K = torch.mm(embeding, torch.t(embeding)) - # Z = self.linear_kernel(embeding, (embeding)) - K = (K + self.s * self.s * torch.eye(self.N, dtype=torch.float64)) - # K = self.kernel(self.x,self.x) + self.s*self.s*torch.eye(self.N, dtype=torch.float64) - # print ("kernel:",K) - # print ("approximate:",Z) - return K - - def sample_theta(self, size=1): - basis = int(int(torch.sum(self.m))) - zeros = torch.zeros(basis, size, dtype=torch.float64) - random_vector = torch.normal(mean=zeros, std=1.) - - if self.fit == True: - # random vector - Z = torch.pinverse(self.K, rcond=10e-6) - self.L = torch.cholesky(Z, upper=False) - theta_mean = torch.mm(Z, torch.mm(self.Q, self.y)) - theta = torch.mm(self.s * self.L, random_vector) - theta = theta + theta_mean - else: - theta_mean = 0 - Z = (1. + self.s * self.s) * torch.eye(basis, dtype=torch.float64) - L = torch.cholesky(Z, upper=False) - theta = torch.mm(L, random_vector) + theta_mean - return theta - - def sample(self, xtest, size=1): - ''' - Sample functions from Gaussian Process - ''' - theta = self.sample_theta(size=size) - f = torch.mm(self.embed(xtest), theta) - return f - - def visualize(self, xtest, f_true=None, points=True, show=True): - [mu, std] = self.mean_std(xtest) - if self.d == 1: - - plt.figure(figsize=(15, 7)) - plt.clf() - plt.plot(self.x.numpy(), self.y.numpy(), 'r+', ms=10, marker="o") - plt.plot(self.x[self.C, :].numpy(), self.y[self.C, :].numpy(), 'g+', ms=10, marker="o") - # plt.plot(xtest.numpy(), self.sample(xtest, size=2).numpy(), 'k--', lw=2, label="sample") - plt.fill_between(xtest.numpy().flat, (mu - 2 * std).numpy().flat, (mu + 2 * std).numpy().flat, - color="#dddddd") - if f_true is not None: - plt.plot(xtest.numpy(), f_true(xtest).numpy(), 'b-', lw=2) - plt.plot(xtest.numpy(), mu.numpy(), 'r-', lw=2, label="posterior mean") - plt.title('Posterior mean prediction plus 2 st.deviation') - plt.legend() - if show == True: - plt.show() - - elif self.d == 2: - from scipy.interpolate import griddata - plt.figure(figsize=(15, 7)) - plt.clf() - ax = plt.axes(projection='3d') - xx = xtest[:, 0].numpy() - yy = xtest[:, 1].numpy() - grid_x, grid_y = np.mgrid[min(xx):max(xx):100j, min(yy):max(yy):100j] - grid_z_mu = griddata((xx, yy), mu[:, 0].numpy(), (grid_x, grid_y), method='linear') - if f_true is not None: - grid_z = griddata((xx, yy), f_true(xtest)[:, 0].numpy(), (grid_x, grid_y), method='linear') - ax.plot_surface(grid_x, grid_y, grid_z, color='b', alpha=0.4) - if points == True: - ax.scatter(self.x[:, 0].numpy(), self.x[:, 1].numpy(), self.y[:, 0].numpy(), c='r', s=100, marker="o", - depthshade=False) - ax.plot_surface(grid_x, grid_y, grid_z_mu, color='r', alpha=0.4) - plt.title('Posterior mean prediction plus 2 st.deviation') - plt.show() - - else: - print("Visualization not implemented") + """ + Nystrom Features for Gaussian Kernel + """ + + def __init__(self, kernel_object, m=100, approx="uniform", s=1.0, samples=100): + + self.fit = False + self.m = m + try: + self.ms = int(torch.sum(m)) + except: + self.ms = m + self.samples = samples + self.kernel_object = kernel_object + self.kernel = kernel_object.kernel + self.approx = approx + self.s = s + + def description(self): + """ + Description of GP in text + :return: string with description + """ + return "Nystrom\n" + "Appprox: " + self.approx + + def subsample(self, x, y): + if self.approx == "uniform": + C, w = self.uniform_subsampling(x, y) + elif self.approx == "leverage": + C, w = self.leverage_score_subsampling(x, y) + elif self.approx == "online_leverage": + C, w = self.sequential_leverage_score_subsampling(x, y) + return (C, w) + + def uniform_subsampling(self, x, y): + N = x.size()[0] + C = np.random.choice(N, int(self.ms)) + weights = torch.ones(self.ms) + return (C, weights) + + def leverage_score_subsampling(self, x, y): + N = x.size()[0] + from stpy.continuous_processes.gauss_procc import GaussianProcess + + GP = GaussianProcess(kernel_custom=self.kernel_object, s=self.s) + GP.fit_gp(x, y) + mean, leverage_scores = GP.mean_std(x) + weights = torch.ones(self.ms) + + args = [0] + size = 1 + + for j in range(N): + point = x[j, :] + if size < self.ms: + leverage_score = float(leverage_scores[j, :]) + q = np.random.binomial(self.ms, leverage_score) + if q > 0: + args.append(j) + weights[size] = (q / float(self.ms)) / leverage_score + size = size + 1 + else: + pass + + print(args, weights) + return (args, weights) + + def sequential_leverage_score_subsampling(self, x, y): + N = x.size()[0] + d = x.size()[1] + from stpy.continuous_processes.gauss_procc import GaussianProcess + + GP = GaussianProcess(kernel_custom=self.kernel_object, s=self.s) + + dts = torch.zeros(self.ms, d, dtype=torch.float64) + dts[0, :] = x[0, :] + args = [0] + size = 1 + weights = torch.ones(self.ms) + + for j in range(N): + point = x[j, :] + # print (size,x.size()) + if size < self.ms: + GP.fit_gp(dts[0:size, :], y[0:size, :]) + mean, leverage_score = GP.mean_std(point.view(1, d)) + q = np.random.binomial(self.ms, float(leverage_score)) + if q > 0: + args.append(j) + dts[size, :] = point + weights[size] = (q / float(self.ms)) / leverage_score + size = size + 1 + else: + pass + return (args, weights) + + def fit_gp(self, x, y, eps=1e-14): + """ + Function to Fit GP + """ + self.x = x + self.y = y + self.d = x.size()[1] + self.N = x.size()[0] + assert self.ms <= self.N + self.linear_kernel = KernelFunction(kernel_name="linear").linear_kernel + if self.approx == "svd": + self.xs = x + K = self.kernel(x, x) + if 3 * self.ms > self.N: + (D, V) = torch.linalg.eigh(K, UPLO="U") + V = torch.t(V)[self.N - self.ms : self.N, :].T + D = D[self.N - self.ms : self.N] + D[D <= eps] = 0 + + else: + (D, V) = torch.lobpcg(K, k=self.ms, niter=-1) + + # Dinv = torch.diag(1./D[self.N-self.ms:self.N]) + # Dinv[Dinv <=0 ] = 0 + # Dinv = torch.sqrt(Dinv) + self.eigs = D + Dinv = torch.diag(torch.sqrt(1.0 / D)) + # self.M = (torch.t(V)[self.N-self.ms:self.N,:]).T @ Dinv.T + self.M = V @ Dinv + # self.embed = lambda q: torch.t(torch.mm(Dinv, torch.mm(torch.t(V)[self.N-self.ms:self.N,:], self.kernel(q, self.x) ))) + self.embed = lambda q: self.kernel(q, self.xs).T @ self.M + self.C = [] + elif self.approx == "nothing": + self.xs = self.x[0 : self.ms, :] + self.M = torch.eye(self.ms).double() + self.embed = lambda q: self.kernel(q, self.xs).T @ self.M + + elif self.approx == "positive_svd": + from sklearn.decomposition import NMF + + GP = GaussianProcess(kernel=self.kernel_object) + ysample = GP.sample(x, size=self.samples) ** 2 + X = ysample + model = NMF(n_components=self.ms, max_iter=8000, tol=1e-12) + W = torch.from_numpy(model.fit_transform(X)) + H = torch.from_numpy(model.components_) + l = torch.norm(W, dim=1) + l = 1.0 / l + + if x.size()[1] == 1: + fs = [] + for j in range(self.ms): + fs.append( + interp1d( + x.view(-1).numpy(), (W.T @ torch.diag(l))[j, :].numpy() + ) + ) + self.embed = lambda q: torch.cat( + [torch.from_numpy(fs[j](q)).view(-1, 1) for j in range(self.ms)], + dim=1, + ) + + elif x.size()[1] == 2: + fs = [] + for j in range(self.ms): + W_j = (W.T @ torch.diag(l))[j, :].numpy() + fs.append(LinearNDInterpolator(x, W_j)) + self.embed = lambda q: torch.cat( + [ + torch.from_numpy(fs[j](q[:, 0], q[:, 1])).view(-1, 1) + for j in range(self.ms) + ], + dim=1, + ) + # elif x.size()[1] == 2: + # fs = [] + # for j in range(self.ms): + # W_j = (W.T @ torch.diag(l))[j, :].numpy() + # fs.append(Rbf(x[:,0],x[:,1], W_j)) + # self.embed = lambda q: torch.cat([torch.from_numpy(fs[j](q[:,0],q[:,1])).view(-1, 1) for j in range(self.ms)], + # dim=1) + + self.C = [] + + elif self.approx == "cover": + K = self.kernel( + x, x + ) # + self.s * self.s * torch.eye(self.N, dtype=torch.float64) + Khalf = torch.from_numpy(np.real(scipy.linalg.sqrtm(K.numpy()))) + Khalfinv = torch.pinverse(Khalf) + self.embed = lambda q: torch.t(torch.mm(Khalfinv, self.kernel(q, self.x))) + else: + self.C, self.weights = self.subsample(x, y) + xs = x[self.C, :] + self.Dweights = torch.diag(self.weights).double() + K = torch.mm( + torch.mm(self.Dweights, self.kernel(xs, xs)), self.Dweights + ) # + self.s*self.s * torch.eye(self.ms, dtype=torch.float64) + # (D, V) = torch.symeig(K, eigenvectors=True) + (D, V) = torch.linalg.eigh(K) + Dinv = torch.diag(1.0 / D) + Dinv[Dinv <= 0] = 0 + Dinv = torch.sqrt(Dinv) + # Dinv = torch.diag(torch.pow(D[:],-0.5)) + self.embed = lambda q: torch.t( + torch.mm( + Dinv, + torch.mm(torch.t(V), torch.mm(self.Dweights, self.kernel(q, xs))), + ) + ) + # self.embed = lambda x: torch.t(torch.mm(torch.sqrt(Dinv),torch.mm(V, self.kernel(x, xs)))) + embeding = self.embed(x) + self.Z_ = embeding.T @ embeding + self.s * self.s * torch.eye(self.ms).double() + + # self.K = (self.Z_ + self.s * self.s * torch.eye(self.ms, dtype=torch.float64)) + self.K = self.Z_ + self.Q = torch.t(embeding) + + self.fit = True + return None + + def mean_std(self, xtest): + if self.fit == False: + raise AssertionError("First fit") + else: + embeding = self.embed(xtest) + Q = self.embed(self.x) + theta_mean, _ = torch.solve(torch.mm(torch.t(Q), self.y), self.K) + ymean = torch.mm(embeding, theta_mean) + temp = torch.t(torch.solve(torch.t(embeding), self.K)[0]) + diagonal = ( + self.s + * self.s + * torch.einsum("ij,ji->i", (temp, torch.t(embeding))).view(-1, 1) + ) + yvar = torch.sqrt(diagonal) + + return (ymean, yvar) + + def outer_kernel(self): + embeding = self.embed(self.x) + # print (embeding.size()) + K = torch.mm(embeding, torch.t(embeding)) + # Z = self.linear_kernel(embeding, (embeding)) + K = K + self.s * self.s * torch.eye(self.N, dtype=torch.float64) + # K = self.kernel(self.x,self.x) + self.s*self.s*torch.eye(self.N, dtype=torch.float64) + # print ("kernel:",K) + # print ("approximate:",Z) + return K + + def sample_theta(self, size=1): + basis = int(int(torch.sum(self.m))) + zeros = torch.zeros(basis, size, dtype=torch.float64) + random_vector = torch.normal(mean=zeros, std=1.0) + + if self.fit == True: + # random vector + Z = torch.pinverse(self.K, rcond=10e-6) + self.L = torch.cholesky(Z, upper=False) + theta_mean = torch.mm(Z, torch.mm(self.Q, self.y)) + theta = torch.mm(self.s * self.L, random_vector) + theta = theta + theta_mean + else: + theta_mean = 0 + Z = (1.0 + self.s * self.s) * torch.eye(basis, dtype=torch.float64) + L = torch.cholesky(Z, upper=False) + theta = torch.mm(L, random_vector) + theta_mean + return theta + + def sample(self, xtest, size=1): + """ + Sample functions from Gaussian Process + """ + theta = self.sample_theta(size=size) + f = torch.mm(self.embed(xtest), theta) + return f + + def visualize(self, xtest, f_true=None, points=True, show=True): + [mu, std] = self.mean_std(xtest) + if self.d == 1: + + plt.figure(figsize=(15, 7)) + plt.clf() + plt.plot(self.x.numpy(), self.y.numpy(), "r+", ms=10, marker="o") + plt.plot( + self.x[self.C, :].numpy(), + self.y[self.C, :].numpy(), + "g+", + ms=10, + marker="o", + ) + # plt.plot(xtest.numpy(), self.sample(xtest, size=2).numpy(), 'k--', lw=2, label="sample") + plt.fill_between( + xtest.numpy().flat, + (mu - 2 * std).numpy().flat, + (mu + 2 * std).numpy().flat, + color="#dddddd", + ) + if f_true is not None: + plt.plot(xtest.numpy(), f_true(xtest).numpy(), "b-", lw=2) + plt.plot(xtest.numpy(), mu.numpy(), "r-", lw=2, label="posterior mean") + plt.title("Posterior mean prediction plus 2 st.deviation") + plt.legend() + if show == True: + plt.show() + + elif self.d == 2: + from scipy.interpolate import griddata + + plt.figure(figsize=(15, 7)) + plt.clf() + ax = plt.axes(projection="3d") + xx = xtest[:, 0].numpy() + yy = xtest[:, 1].numpy() + grid_x, grid_y = np.mgrid[ + min(xx) : max(xx) : 100j, min(yy) : max(yy) : 100j + ] + grid_z_mu = griddata( + (xx, yy), mu[:, 0].numpy(), (grid_x, grid_y), method="linear" + ) + if f_true is not None: + grid_z = griddata( + (xx, yy), + f_true(xtest)[:, 0].numpy(), + (grid_x, grid_y), + method="linear", + ) + ax.plot_surface(grid_x, grid_y, grid_z, color="b", alpha=0.4) + if points == True: + ax.scatter( + self.x[:, 0].numpy(), + self.x[:, 1].numpy(), + self.y[:, 0].numpy(), + c="r", + s=100, + marker="o", + depthshade=False, + ) + ax.plot_surface(grid_x, grid_y, grid_z_mu, color="r", alpha=0.4) + plt.title("Posterior mean prediction plus 2 st.deviation") + plt.show() + + else: + print("Visualization not implemented") if __name__ == "__main__": - # domain size - L_infinity_ball = 1 - # dimension - d = 1 - # error variance - s = 0.1 - # grid density - n = 1024 - # number of intial points - N = 100 - # smoothness - gamma = torch.from_numpy(np.array([0.4, 0.4])) - # test problem - - xtest = torch.from_numpy(interval(n, d)) - x = torch.from_numpy(np.random.uniform(-L_infinity_ball, L_infinity_ball, size=(N, d))) - - f_no_noise = lambda q: torch.sin(torch.sum(q * 4, dim=1)).view(-1, 1) - # f_no_noise = lambda q: torch.sin((q[:,0] * 4)).view(-1, 1) - - f = lambda q: f_no_noise(q) + torch.normal(mean=torch.zeros(q.size()[0], 1, dtype=torch.float64), std=1., - out=None) * s - # targets - y = f(x) - - # GP model with squared exponential - - kernel = KernelFunction(gamma=0.05) - GP0 = GaussianProcess(kernel_custom=kernel, s=s) - GP0.fit_gp(x, y) - GP0.visualize(xtest, f_true=f_no_noise) - - GP = NystromFeatures(kernel, m=torch.Tensor([30]), s=s, approx="uniform") - GP.fit_gp(x, y) - GP.visualize(xtest, f_true=f_no_noise) - - GP = NystromFeatures(kernel, m=torch.Tensor([30]), s=s, approx="online_leverage") - GP.fit_gp(x, y) - GP.visualize(xtest, f_true=f_no_noise) - - GP = NystromFeatures(kernel, m=torch.Tensor([30]), s=s, approx="svd") - GP.fit_gp(x, y) - print(GP0.K, GP.outer_kernel()) - GP.visualize(xtest, f_true=f_no_noise) + # domain size + L_infinity_ball = 1 + # dimension + d = 1 + # error variance + s = 0.1 + # grid density + n = 1024 + # number of intial points + N = 100 + # smoothness + gamma = torch.from_numpy(np.array([0.4, 0.4])) + # test problem + + xtest = torch.from_numpy(interval(n, d)) + x = torch.from_numpy( + np.random.uniform(-L_infinity_ball, L_infinity_ball, size=(N, d)) + ) + + f_no_noise = lambda q: torch.sin(torch.sum(q * 4, dim=1)).view(-1, 1) + # f_no_noise = lambda q: torch.sin((q[:,0] * 4)).view(-1, 1) + + f = ( + lambda q: f_no_noise(q) + + torch.normal( + mean=torch.zeros(q.size()[0], 1, dtype=torch.float64), std=1.0, out=None + ) + * s + ) + # targets + y = f(x) + + # GP model with squared exponential + + kernel = KernelFunction(gamma=0.05) + GP0 = GaussianProcess(kernel_custom=kernel, s=s) + GP0.fit_gp(x, y) + GP0.visualize(xtest, f_true=f_no_noise) + + GP = NystromFeatures(kernel, m=torch.Tensor([30]), s=s, approx="uniform") + GP.fit_gp(x, y) + GP.visualize(xtest, f_true=f_no_noise) + + GP = NystromFeatures(kernel, m=torch.Tensor([30]), s=s, approx="online_leverage") + GP.fit_gp(x, y) + GP.visualize(xtest, f_true=f_no_noise) + + GP = NystromFeatures(kernel, m=torch.Tensor([30]), s=s, approx="svd") + GP.fit_gp(x, y) + print(GP0.K, GP.outer_kernel()) + GP.visualize(xtest, f_true=f_no_noise) diff --git a/stpy/continuous_processes/primal_mkl.py b/stpy/continuous_processes/primal_mkl.py index 4944f9b..7ee2f66 100755 --- a/stpy/continuous_processes/primal_mkl.py +++ b/stpy/continuous_processes/primal_mkl.py @@ -3,200 +3,215 @@ import numpy as np import matplotlib.pyplot as plt -class PrimalMKL(RandomProcess): - - def __init__(self,embeddings,init_weights = None, lam = 0.0, s = 0): - self.embeddings = embeddings - self.init_weights = init_weights - self.no_models = len(embeddings) - self.s = s - self.lam = lam - if not isinstance(self.lam,list): - self.lam = [lam for i in range(self.no_models)] - - def total_embed_dim(self): - self.dims = [] - for embedding in self.embeddings: - self.dims.append(embedding.get_basis_size()) - sum = torch.sum(torch.Tensor(self.dims)) - return sum - - def get_emebed_dims(self): - self.total_embed_dim() - return self.dims - - # def fit_gp(self, x, y): - # """ - # In this function we are fitting - # In this function we are fitting - # - # - # - # :param x: - # :param y: - # :return: - # """ - # - # - # self.x = x - # self.y = y - # (self.n, self.d) = self.x.size() - # self.total_m = self.total_embed_dim() - # dims_index = torch.cumsum(torch.Tensor([0] + self.get_emebed_dims()),dim = 0).int() - # self.w = [torch.ones(size = (i,1), dtype = torch.float64,requires_grad = True) for i in self.get_emebed_dims()] - # - # self.theta = torch.ones(size = (self.no_models,1), dtype = torch.float64,requires_grad = True) - # - # - - - def fit_gp(self,x,y): - self.x = x - self.y = y - (self.n,self.d) = self.x.size() - self.total_m = self.total_embed_dim() - dims_index = torch.cumsum(torch.Tensor([0] + self.get_emebed_dims()),dim = 0).int() - - self.w = [torch.ones(size = (i,1), dtype = torch.float64,requires_grad = True) for i in self.get_emebed_dims()] - - self.theta = torch.ones(size = (self.no_models,1), dtype = torch.float64,requires_grad = True) - - # def cost(theta,w): - # - # Phi = torch.zeros(size = (self.n,int(self.total_m)), dtype = torch.float64) - # reg = 0.0 - # for index,embedding in enumerate(self.embeddings): - # Phi[:,dims_index[index]:dims_index[index+1]] = embedding.embed(self.x)*torch.sqrt(theta[index]) - # reg = reg + torch.sqrt(torch.sum((torch.sqrt(theta[index])*w[index])**2)) - # wvector = torch.cat(w, 0) - # cost = torch.sum((torch.mm(Phi,wvector) - self.y)**2) - # cost = cost + self.lam*reg - # return cost - - def regularizers(w): - reg = torch.zeros(self.no_models,dtype=torch.float64) - for index, embedding in enumerate(self.embeddings): - reg[index] = torch.sqrt(torch.sum(w[index] ** 2)) - return reg - - def cost(w): - Phi = torch.zeros(size = (self.n,int(self.total_m)), dtype = torch.float64) - reg = 0.0 - for index,embedding in enumerate(self.embeddings): - Phi[:,dims_index[index]:dims_index[index+1]] = embedding.embed_internal(self.x) - reg = reg + self.lam[index]*torch.sqrt(torch.sum(w[index])**2) - wvector = torch.cat(w, 0) - cost = torch.sum((torch.mm(Phi,wvector) - self.y)**2) - cost = cost + reg**2 + self.s*torch.norm(wvector)**2 - return cost - - - - ## optimizer objective - loss = torch.zeros(1,1,requires_grad = True,dtype = torch.float64) - loss = loss + cost(self.w) - - - - - #loss.requires_grad_(True) - - - from pymanopt.manifolds import Euclidean, Product - from pymanopt import Problem - from pymanopt.solvers import ConjugateGradient - from stpy.cost_functions import CostFunction - - # define cost function - C = CostFunction(cost, number_args=self.no_models) - [cost_numpy, egrad_numpy, ehess_numpy] = C.define() - x = [np.ones(shape = (i,1)) for i in self.get_emebed_dims()] - - - - # Optimization with Conjugate Gradient Descent - #print (cost_numpy(x)) - manifold = Product( [Euclidean(i) for i in self.get_emebed_dims()]) - problem = Problem(manifold=manifold, cost=cost_numpy, egrad=egrad_numpy, ehess=ehess_numpy, verbosity=10) - #solver = SteepestDescent(maxiter=1000, mingradnorm=1e-8, minstepsize=1e-10) - solver = ConjugateGradient(maxiter=1000, mingradnorm=1e-8, minstepsize=1e-20) - Xopt = solver.solve(problem, x=x) +class PrimalMKL(RandomProcess): - - - - self.w = [torch.from_numpy(w) for w in Xopt] - self.theta = torch.sum(regularizers(self.w),dim = 0)/regularizers(self.w) + self.s - self.theta = 1./self.theta - - print (self.theta) - - - def mean_var(self,xtest): - n = xtest.size()[0] - dims_index = torch.cumsum(torch.Tensor([0] + self.get_emebed_dims()),dim = 0).int() - Phi = torch.zeros(size=(n, int(self.total_m)), dtype=torch.float64) - - for index, embedding in enumerate(self.embeddings): - Phi[:, dims_index[index]:dims_index[index + 1]] = embedding.embed_internal(xtest) - - wvector = torch.cat(self.w, 0) - mu = torch.mm(Phi, wvector) - - K = (torch.mm(torch.t(Phi),Phi) + self.s * torch.eye(int(self.total_m), dtype=torch.float64)) - temp = torch.t(torch.solve(torch.t(Phi),K)[0]) - var = torch.sqrt(self.s*self.s*torch.einsum('ij,ji->i', (temp, torch.t(Phi) )).view(-1, 1)) - - mu = mu.detach() - var = var.detach() - - return (mu,var) - - def sample(self,xtest, size =1): - mu, var = self.mean_var(xtest) - sample = mu + var - return sample - - def visualize(self,xtest,f_true = None, points = True, show = True): - super().visualize(xtest,f_true = f_true, points = points, show = False) - ## histogram of weights - plt.figure(2) - plt.bar(np.arange(len(self.embeddings)), self.theta.detach().numpy().flatten(), np.ones(len(self.embeddings)) * 0.5) - plt.show() + def __init__(self, embeddings, init_weights=None, lam=0.0, s=0): + self.embeddings = embeddings + self.init_weights = init_weights + self.no_models = len(embeddings) + self.s = s + self.lam = lam + if not isinstance(self.lam, list): + self.lam = [lam for i in range(self.no_models)] + + def total_embed_dim(self): + self.dims = [] + for embedding in self.embeddings: + self.dims.append(embedding.get_basis_size()) + sum = torch.sum(torch.Tensor(self.dims)) + return sum + + def get_emebed_dims(self): + self.total_embed_dim() + return self.dims + + # def fit_gp(self, x, y): + # """ + # In this function we are fitting + # In this function we are fitting + # + # + # + # :param x: + # :param y: + # :return: + # """ + # + # + # self.x = x + # self.y = y + # (self.n, self.d) = self.x.size() + # self.total_m = self.total_embed_dim() + # dims_index = torch.cumsum(torch.Tensor([0] + self.get_emebed_dims()),dim = 0).int() + # self.w = [torch.ones(size = (i,1), dtype = torch.float64,requires_grad = True) for i in self.get_emebed_dims()] + # + # self.theta = torch.ones(size = (self.no_models,1), dtype = torch.float64,requires_grad = True) + # + # + + def fit_gp(self, x, y): + self.x = x + self.y = y + (self.n, self.d) = self.x.size() + self.total_m = self.total_embed_dim() + dims_index = torch.cumsum( + torch.Tensor([0] + self.get_emebed_dims()), dim=0 + ).int() + + self.w = [ + torch.ones(size=(i, 1), dtype=torch.float64, requires_grad=True) + for i in self.get_emebed_dims() + ] + + self.theta = torch.ones( + size=(self.no_models, 1), dtype=torch.float64, requires_grad=True + ) + + # def cost(theta,w): + # + # Phi = torch.zeros(size = (self.n,int(self.total_m)), dtype = torch.float64) + # reg = 0.0 + # for index,embedding in enumerate(self.embeddings): + # Phi[:,dims_index[index]:dims_index[index+1]] = embedding.embed(self.x)*torch.sqrt(theta[index]) + # reg = reg + torch.sqrt(torch.sum((torch.sqrt(theta[index])*w[index])**2)) + # wvector = torch.cat(w, 0) + # cost = torch.sum((torch.mm(Phi,wvector) - self.y)**2) + # cost = cost + self.lam*reg + # return cost + + def regularizers(w): + reg = torch.zeros(self.no_models, dtype=torch.float64) + for index, embedding in enumerate(self.embeddings): + reg[index] = torch.sqrt(torch.sum(w[index] ** 2)) + return reg + + def cost(w): + Phi = torch.zeros(size=(self.n, int(self.total_m)), dtype=torch.float64) + reg = 0.0 + for index, embedding in enumerate(self.embeddings): + Phi[:, dims_index[index] : dims_index[index + 1]] = ( + embedding.embed_internal(self.x) + ) + reg = reg + self.lam[index] * torch.sqrt(torch.sum(w[index]) ** 2) + wvector = torch.cat(w, 0) + cost = torch.sum((torch.mm(Phi, wvector) - self.y) ** 2) + cost = cost + reg**2 + self.s * torch.norm(wvector) ** 2 + return cost + + ## optimizer objective + loss = torch.zeros(1, 1, requires_grad=True, dtype=torch.float64) + loss = loss + cost(self.w) + + # loss.requires_grad_(True) + + from pymanopt.manifolds import Euclidean, Product + from pymanopt import Problem + from pymanopt.solvers import ConjugateGradient + from stpy.cost_functions import CostFunction + + # define cost function + C = CostFunction(cost, number_args=self.no_models) + [cost_numpy, egrad_numpy, ehess_numpy] = C.define() + x = [np.ones(shape=(i, 1)) for i in self.get_emebed_dims()] + + # Optimization with Conjugate Gradient Descent + # print (cost_numpy(x)) + manifold = Product([Euclidean(i) for i in self.get_emebed_dims()]) + problem = Problem( + manifold=manifold, + cost=cost_numpy, + egrad=egrad_numpy, + ehess=ehess_numpy, + verbosity=10, + ) + # solver = SteepestDescent(maxiter=1000, mingradnorm=1e-8, minstepsize=1e-10) + solver = ConjugateGradient(maxiter=1000, mingradnorm=1e-8, minstepsize=1e-20) + Xopt = solver.solve(problem, x=x) + + self.w = [torch.from_numpy(w) for w in Xopt] + self.theta = ( + torch.sum(regularizers(self.w), dim=0) / regularizers(self.w) + self.s + ) + self.theta = 1.0 / self.theta + + print(self.theta) + + def mean_var(self, xtest): + n = xtest.size()[0] + dims_index = torch.cumsum( + torch.Tensor([0] + self.get_emebed_dims()), dim=0 + ).int() + Phi = torch.zeros(size=(n, int(self.total_m)), dtype=torch.float64) + + for index, embedding in enumerate(self.embeddings): + Phi[:, dims_index[index] : dims_index[index + 1]] = ( + embedding.embed_internal(xtest) + ) + + wvector = torch.cat(self.w, 0) + mu = torch.mm(Phi, wvector) + + K = torch.mm(torch.t(Phi), Phi) + self.s * torch.eye( + int(self.total_m), dtype=torch.float64 + ) + temp = torch.t(torch.solve(torch.t(Phi), K)[0]) + var = torch.sqrt( + self.s * self.s * torch.einsum("ij,ji->i", (temp, torch.t(Phi))).view(-1, 1) + ) + + mu = mu.detach() + var = var.detach() + + return (mu, var) + + def sample(self, xtest, size=1): + mu, var = self.mean_var(xtest) + sample = mu + var + return sample + + def visualize(self, xtest, f_true=None, points=True, show=True): + super().visualize(xtest, f_true=f_true, points=points, show=False) + ## histogram of weights + plt.figure(2) + plt.bar( + np.arange(len(self.embeddings)), + self.theta.detach().numpy().flatten(), + np.ones(len(self.embeddings)) * 0.5, + ) + plt.show() if __name__ == "__main__": - from stpy.continuous_processes.fourier_fea import GaussianProcessFF - from stpy.continuous_processes.gauss_procc import GaussianProcess - from stpy.test_functions.benchmarks import MultiRKHS - - n = 1024 - N = 100 - s = 0.01 - TestFunction = MultiRKHS() - xtest = TestFunction.interval(n) - x = TestFunction.initial_guess(N) - y = TestFunction.eval(x,sigma = s) - #TestFunction.visualize(xtest) - - - GP1 = GaussianProcess(s=0, kernel="linear") - GP2 = GaussianProcessFF(s=s, m=100, approx="hermite") - - MKL = PrimalMKL([GP1,GP2], lam=[0.1, 0.1], s = s) - MKL.fit_gp(x, y) - - print ("Importance Weights:",MKL.theta) - - print("Slope of linear line:", MKL.w[0]) - - MKL.visualize(xtest, f_true=TestFunction.eval_noiseless) - - # MKL = PrimalMKL(GPs, lam=0.01) - # MKL.fit_gp(x,y) - # MKL.visualize(xtest,f_true=TestFunction.eval_noiseless) - # - # MKL = PrimalMKL(GPs, lam=0.0001) - # MKL.fit_gp(x,y) - # MKL.visualize(xtest,f_true=TestFunction.eval_noiseless) + from stpy.continuous_processes.fourier_fea import GaussianProcessFF + from stpy.continuous_processes.gauss_procc import GaussianProcess + from stpy.test_functions.benchmarks import MultiRKHS + + n = 1024 + N = 100 + s = 0.01 + TestFunction = MultiRKHS() + xtest = TestFunction.interval(n) + x = TestFunction.initial_guess(N) + y = TestFunction.eval(x, sigma=s) + # TestFunction.visualize(xtest) + + GP1 = GaussianProcess(s=0, kernel="linear") + GP2 = GaussianProcessFF(s=s, m=100, approx="hermite") + + MKL = PrimalMKL([GP1, GP2], lam=[0.1, 0.1], s=s) + MKL.fit_gp(x, y) + + print("Importance Weights:", MKL.theta) + + print("Slope of linear line:", MKL.w[0]) + + MKL.visualize(xtest, f_true=TestFunction.eval_noiseless) + + # MKL = PrimalMKL(GPs, lam=0.01) + # MKL.fit_gp(x,y) + # MKL.visualize(xtest,f_true=TestFunction.eval_noiseless) + # + # MKL = PrimalMKL(GPs, lam=0.0001) + # MKL.fit_gp(x,y) + # MKL.visualize(xtest,f_true=TestFunction.eval_noiseless) diff --git a/stpy/continuous_processes/trace_features.py b/stpy/continuous_processes/trace_features.py index 6f8a8ab..a8f5f26 100644 --- a/stpy/continuous_processes/trace_features.py +++ b/stpy/continuous_processes/trace_features.py @@ -7,117 +7,125 @@ class TraceFeatures(KernelizedFeatures): - def __init__(self, *args, PSD=False, **kwargs): - super().__init__(*args, **kwargs) - self.m = int(self.m) - self.PSD = PSD - - def construct_covariance(self): - emb = self.emb - X = torch.flatten(torch.einsum('ij,ik->jki', emb, emb).permute(1, 0, 2), end_dim=1) - V = torch.einsum('ik,jk->ij', X, X) - # Z = torch.einsum('ij,j->i',X,y.reshape(-1)).reshape(-1,1) - self.V = V + self.lam * self.s ** 2 * torch.eye(self.m ** 2).double() - - # self.A_new,_ = torch.solve(Z,self.V) - # self.A_new = self.A_new.reshape(self.m,self.m) - - def fit_gp(self, x, y): - self.n, self.d = x.size() - self.x = x - self.y = y - - self.emb = self.embed(x) - self.construct_covariance() - - emb = self.emb.numpy() - A = cp.Variable((self.m, self.m), symmetric=True) - cost = cp.sum_squares \ - (cp.diag(emb @ A @ emb.T) - y.view(-1).numpy()) / (self.s ** 2) + (self.lam) * cp.norm(A, "fro") - - if self.PSD == True: - constraints = [A >> 0] - else: - constraints = [] - - prob = cp.Problem(cp.Minimize(cost), constraints) - prob.solve(solver=cp.MOSEK, verbose=True) - self.A = torch.from_numpy(A.value) - self.fit = True - - def mean_std(self, xtest, std=True): - emb = self.embed(xtest) - mu = torch.einsum('ij,jk,ik->i', emb, self.A, emb).view(-1, 1) - if std == True: - # invV = torch.inverse(self.V) - X = torch.flatten(torch.einsum('ij,ik->jki', emb, emb), end_dim=1) - Z, _ = torch.solve(X, self.V) - # diagonal = self.lam*self.s ** 2 * torch.einsum('ji,jk,ki->i', (X, invV, X)).view(-1, 1) - diagonal = self.lam * self.s ** 2 * torch.einsum('ij,ij->j', X, Z).view(-1, 1) - return mu, torch.sqrt(diagonal).view(-1, 1) - else: - return mu - - def band(self, xtest, sqrtbeta=2., maximization=True): - emb = self.embed(xtest) - X = torch.einsum('ij,ik->ijk', emb, emb) - n = emb.size()[0] - ucb = torch.zeros(size=(n, 1)).double() - - for i in range(n): - A = cp.Variable((self.m, self.m), symmetric=True) - cost = cp.trace(A @ X[i, :, :]) - - Z = torch.cholesky(self.V, upper=True) - zero = np.zeros(self.m ** 2) - constraints = [cp.SOC(zero.T @ cp.vec(A) + self.s * sqrtbeta, Z @ (cp.vec(A) - cp.vec(self.A.numpy())))] - - if self.PSD == True: - constraints += [A >> 0] - - if maximization == True: - prob = cp.Problem(cp.Maximize(cost), constraints) - else: - prob = cp.Problem(cp.Minimize(cost), constraints) - - prob.solve(solver=cp.MOSEK, verbose=False) - ucb[i] = torch.trace(torch.from_numpy(A.value) @ X[i, :, :]) - return ucb - - def lcb(self, xtest, sqrtbeta=2.): - return self.band(xtest, sqrtbeta=sqrtbeta, maximization=False) - - def ucb(self, xtest, sqrtbeta=2.): - return self.band(xtest, sqrtbeta=sqrtbeta, maximization=True) + def __init__(self, *args, PSD=False, **kwargs): + super().__init__(*args, **kwargs) + self.m = int(self.m) + self.PSD = PSD + + def construct_covariance(self): + emb = self.emb + X = torch.flatten( + torch.einsum("ij,ik->jki", emb, emb).permute(1, 0, 2), end_dim=1 + ) + V = torch.einsum("ik,jk->ij", X, X) + # Z = torch.einsum('ij,j->i',X,y.reshape(-1)).reshape(-1,1) + self.V = V + self.lam * self.s**2 * torch.eye(self.m**2).double() + + # self.A_new,_ = torch.solve(Z,self.V) + # self.A_new = self.A_new.reshape(self.m,self.m) + + def fit_gp(self, x, y): + self.n, self.d = x.size() + self.x = x + self.y = y + + self.emb = self.embed(x) + self.construct_covariance() + + emb = self.emb.numpy() + A = cp.Variable((self.m, self.m), symmetric=True) + cost = cp.sum_squares(cp.diag(emb @ A @ emb.T) - y.view(-1).numpy()) / ( + self.s**2 + ) + (self.lam) * cp.norm(A, "fro") + + if self.PSD == True: + constraints = [A >> 0] + else: + constraints = [] + + prob = cp.Problem(cp.Minimize(cost), constraints) + prob.solve(solver=cp.MOSEK, verbose=True) + self.A = torch.from_numpy(A.value) + self.fit = True + + def mean_std(self, xtest, std=True): + emb = self.embed(xtest) + mu = torch.einsum("ij,jk,ik->i", emb, self.A, emb).view(-1, 1) + if std == True: + # invV = torch.inverse(self.V) + X = torch.flatten(torch.einsum("ij,ik->jki", emb, emb), end_dim=1) + Z, _ = torch.solve(X, self.V) + # diagonal = self.lam*self.s ** 2 * torch.einsum('ji,jk,ki->i', (X, invV, X)).view(-1, 1) + diagonal = self.lam * self.s**2 * torch.einsum("ij,ij->j", X, Z).view(-1, 1) + return mu, torch.sqrt(diagonal).view(-1, 1) + else: + return mu + + def band(self, xtest, sqrtbeta=2.0, maximization=True): + emb = self.embed(xtest) + X = torch.einsum("ij,ik->ijk", emb, emb) + n = emb.size()[0] + ucb = torch.zeros(size=(n, 1)).double() + + for i in range(n): + A = cp.Variable((self.m, self.m), symmetric=True) + cost = cp.trace(A @ X[i, :, :]) + + Z = torch.cholesky(self.V, upper=True) + zero = np.zeros(self.m**2) + constraints = [ + cp.SOC( + zero.T @ cp.vec(A) + self.s * sqrtbeta, + Z @ (cp.vec(A) - cp.vec(self.A.numpy())), + ) + ] + + if self.PSD == True: + constraints += [A >> 0] + + if maximization == True: + prob = cp.Problem(cp.Maximize(cost), constraints) + else: + prob = cp.Problem(cp.Minimize(cost), constraints) + + prob.solve(solver=cp.MOSEK, verbose=False) + ucb[i] = torch.trace(torch.from_numpy(A.value) @ X[i, :, :]) + return ucb + + def lcb(self, xtest, sqrtbeta=2.0): + return self.band(xtest, sqrtbeta=sqrtbeta, maximization=False) + + def ucb(self, xtest, sqrtbeta=2.0): + return self.band(xtest, sqrtbeta=sqrtbeta, maximization=True) if __name__ == "__main__": - from stpy.embeddings.embedding import HermiteEmbedding - import matplotlib.pyplot as plt + from stpy.embeddings.embedding import HermiteEmbedding + import matplotlib.pyplot as plt - m = 32 - n = 16 - s = 0.01 - N = 5 + m = 32 + n = 16 + s = 0.01 + N = 5 - func = lambda x: torch.sin(x * np.pi) ** 2 + 0.5 - x = torch.from_numpy(np.random.uniform(-1, 1, size=(N, 1))) - y = func(x) + func = lambda x: torch.sin(x * np.pi) ** 2 + 0.5 + x = torch.from_numpy(np.random.uniform(-1, 1, size=(N, 1))) + y = func(x) - embedding = HermiteEmbedding(m=m, gamma=0.5) - xtest = torch.from_numpy(np.linspace(-1, 1, n)).view(-1, 1) + embedding = HermiteEmbedding(m=m, gamma=0.5) + xtest = torch.from_numpy(np.linspace(-1, 1, n)).view(-1, 1) - F = TraceFeatures(s=s, embedding=embedding, m=m, PSD=True) - F.fit_gp(x, y) + F = TraceFeatures(s=s, embedding=embedding, m=m, PSD=True) + F.fit_gp(x, y) - F.visualize(xtest, f_true=func, size=0, show=False) + F.visualize(xtest, f_true=func, size=0, show=False) - lcb = F.lcb(xtest) - ucb = F.ucb(xtest) - plt.plot(xtest, lcb, '-s', color='lightblue', label='lcb') - plt.plot(xtest, ucb, '-s', color='gray', label='ucb') - plt.legend() - plt.show() + lcb = F.lcb(xtest) + ucb = F.ucb(xtest) + plt.plot(xtest, lcb, "-s", color="lightblue", label="lcb") + plt.plot(xtest, ucb, "-s", color="gray", label="ucb") + plt.legend() + plt.show() # # mu, std = F.mean_std(xtest) # plt.plot(xtest,func(xtest),'r',label = 'true') diff --git a/stpy/continuous_processes/truncated_kernelized_features.py b/stpy/continuous_processes/truncated_kernelized_features.py index 4fd76c3..b6f84b1 100644 --- a/stpy/continuous_processes/truncated_kernelized_features.py +++ b/stpy/continuous_processes/truncated_kernelized_features.py @@ -1,61 +1,96 @@ from stpy.continuous_processes.kernelized_features import KernelizedFeatures -import torch +import torch + class TruncatedKernelizedFeatures(KernelizedFeatures): - def __init__(self, embedding, m, s=0.001, lam=1, d=1, diameter=1, verbose=True, groups=None, bounds=None, scale=1, kappa=1, poly=2, - primal=True, beta_fun=None, alpha_score=lambda t: t**(1/4), default_alpha_score=1., bound = 1.): - super().__init__(embedding, m, s =s, lam=lam,d= d,diameter= diameter, verbose=verbose, - groups = groups, bounds=bounds, scale=scale, kappa=kappa, poly=poly, primal=primal, beta_fun = beta_fun, bound = bound) - primal = True - self.bound = bound - self.alpha_score = alpha_score - self.default_alpha_score = default_alpha_score - - def theta_mean(self, var=False, prior=False): - self.precompute() - - if self.fitted == True and prior == False: - theta_mean = self.invV@self.Q.T@self.y_truncated - Z = self.s**2 * self.invV - else: - theta_mean = 0*torch.ones(size=(self.m, 1)).double() - - if var is False: - return theta_mean - else: - return (theta_mean, Z) - - def fit(self, x=None, y=None): - self.alphas = self.y*0 + self.default_alpha_score - super().fit(x= x, y= y) - - def add_points(self,d): - x, y = d - if self.x is not None: - self.x = torch.cat((self.x, x), dim=0) - self.y = torch.cat((self.y, y), dim=0) - new_alpha =torch.Tensor( [self.alpha_score(self.x.size()[0])]).view(1,1) - self.alphas = torch.cat((self.alphas,new_alpha),dim=0) - else: - self.x = x - self.y = y - self.alphas = self.default_alpha_score - self.fitted = False - - def add_data_point(self,x,y): - self.add_points(x,y) - - def precompute(self): - if self.fitted == False: - self.Q = self.embed(self.x) - I = torch.eye(int(self.m)).double() - Z_ = self.Q.T@self.Q - self.V = Z_ + (self.s **2) * self.lam *I - self.invV = torch.pinverse(self.V) - self.y_truncated = self.y.view(-1)*(torch.abs(self.y) < self.alphas).view(-1).double() - self.y_truncated = self.y_truncated.view(-1,1) - self.fitted = True - else: - pass + def __init__( + self, + embedding, + m, + s=0.001, + lam=1, + d=1, + diameter=1, + verbose=True, + groups=None, + bounds=None, + scale=1, + kappa=1, + poly=2, + primal=True, + beta_fun=None, + alpha_score=lambda t: t ** (1 / 4), + default_alpha_score=1.0, + bound=1.0, + ): + super().__init__( + embedding, + m, + s=s, + lam=lam, + d=d, + diameter=diameter, + verbose=verbose, + groups=groups, + bounds=bounds, + scale=scale, + kappa=kappa, + poly=poly, + primal=primal, + beta_fun=beta_fun, + bound=bound, + ) + primal = True + self.bound = bound + self.alpha_score = alpha_score + self.default_alpha_score = default_alpha_score + + def theta_mean(self, var=False, prior=False): + self.precompute() + + if self.fitted == True and prior == False: + theta_mean = self.invV @ self.Q.T @ self.y_truncated + Z = self.s**2 * self.invV + else: + theta_mean = 0 * torch.ones(size=(self.m, 1)).double() + + if var is False: + return theta_mean + else: + return (theta_mean, Z) + + def fit(self, x=None, y=None): + self.alphas = self.y * 0 + self.default_alpha_score + super().fit(x=x, y=y) + + def add_points(self, d): + x, y = d + if self.x is not None: + self.x = torch.cat((self.x, x), dim=0) + self.y = torch.cat((self.y, y), dim=0) + new_alpha = torch.Tensor([self.alpha_score(self.x.size()[0])]).view(1, 1) + self.alphas = torch.cat((self.alphas, new_alpha), dim=0) + else: + self.x = x + self.y = y + self.alphas = self.default_alpha_score + self.fitted = False + + def add_data_point(self, x, y): + self.add_points(x, y) + def precompute(self): + if self.fitted == False: + self.Q = self.embed(self.x) + I = torch.eye(int(self.m)).double() + Z_ = self.Q.T @ self.Q + self.V = Z_ + (self.s**2) * self.lam * I + self.invV = torch.pinverse(self.V) + self.y_truncated = ( + self.y.view(-1) * (torch.abs(self.y) < self.alphas).view(-1).double() + ) + self.y_truncated = self.y_truncated.view(-1, 1) + self.fitted = True + else: + pass diff --git a/stpy/dimred/sri.py b/stpy/dimred/sri.py index 6904037..04f6d75 100644 --- a/stpy/dimred/sri.py +++ b/stpy/dimred/sri.py @@ -5,130 +5,138 @@ from sklearn.cluster import KMeans -class SRI(): - - def __init__(self): - """ - :param X: X values - :param y: response variables - :param relative: relative to number of samples - :param buckets: - """ - - def standardize(self, X): - (n, d) = X.size() - Sigma_x = np.cov(self.X.numpy().T) - E_x = np.mean(self.X.numpy(), axis=0) - Sigma_x_half_inv = np.linalg.pinv(scipy.linalg.sqrtm(Sigma_x)) - Z = (X.numpy() - np.outer(np.ones(n), E_x)) @ Sigma_x_half_inv - - return Sigma_x_half_inv, Z - - def slice_kmeans(self, y): - indices = [] - kmeans = KMeans(n_clusters=self.buckets).fit(y.numpy().reshape(-1, 1)) - - for label in range(self.buckets): - ind = kmeans.labels_ == label - indices.append(ind) - return indices - - def fit_sri(self, X, y, buckets=10): - self.X = X - self.y = y - self.buckets = buckets - (n, d) = self.X.size() - Sigma_x_half_inv, Z = self.standardize(self.X) - - if isinstance(self.buckets, int): - indices = self.slice_kmeans(self.y) - - zs = [] - ns = [] - for ind in indices: - if np.sum(ind) > 1: - z = np.mean(Z[ind, :].reshape(-1, d), axis=0) - ns.append(np.sum(ind)) - zs.append(z) - Zn = np.array(zs) - V = (Zn.T @ np.diag(ns) @ Zn) / self.buckets - - else: - raise AssertionError("Unknown bucketing rule.") - - lams, eignv = np.linalg.eig(V) - betas = Sigma_x_half_inv @ eignv - return torch.from_numpy(lams), torch.from_numpy(betas) - - def fit_save(self, X, y, buckets=10): - self.X = X - self.y = y - self.buckets = buckets - (n, d) = self.X.size() - Sigma_x_half_inv, Z = self.standardize(self.X) - - if isinstance(self.buckets, int): - indices = self.slice_kmeans(self.y) - - V = np.zeros(shape=(d, d)) - I = np.eye(d) - for ind in indices: - ns = np.sum(ind) - if ns > 1: - Covar_slice = np.cov(Z[ind, :].reshape(-1, d).T) - V = V + ((I - Covar_slice) @ (I - Covar_slice)) * (float(ns) / float(n)) - - else: - raise AssertionError("Unknown bucketing rule.") - - lams, eignv = np.linalg.eig(V) - betas = Sigma_x_half_inv @ eignv - return torch.from_numpy(lams), torch.from_numpy(betas) - - def gradient_design(self, d, k, nablaF, eps=10e-4): - Sigma = torch.eye(d).double() * eps - x0 = torch.rand(size=(k, d)).double() - subspace = nablaF(x0) - Sigma = Sigma + subspace.T @ subspace - return x0, Sigma, subspace - - def sample_dir(self, n, x0, subspace, eps=10e-4): - indices = np.arange(0, x0.size()[0], 1) - choice = np.random.choice(indices, n, replace=True) - magnitude = np.diag(np.random.randn(n)) - sample = x0.numpy()[choice] + magnitude @ subspace[choice].numpy() + eps * np.random.randn(n, d) - return torch.from_numpy(sample) +class SRI: + + def __init__(self): + """ + :param X: X values + :param y: response variables + :param relative: relative to number of samples + :param buckets: + """ + + def standardize(self, X): + (n, d) = X.size() + Sigma_x = np.cov(self.X.numpy().T) + E_x = np.mean(self.X.numpy(), axis=0) + Sigma_x_half_inv = np.linalg.pinv(scipy.linalg.sqrtm(Sigma_x)) + Z = (X.numpy() - np.outer(np.ones(n), E_x)) @ Sigma_x_half_inv + + return Sigma_x_half_inv, Z + + def slice_kmeans(self, y): + indices = [] + kmeans = KMeans(n_clusters=self.buckets).fit(y.numpy().reshape(-1, 1)) + + for label in range(self.buckets): + ind = kmeans.labels_ == label + indices.append(ind) + return indices + + def fit_sri(self, X, y, buckets=10): + self.X = X + self.y = y + self.buckets = buckets + (n, d) = self.X.size() + Sigma_x_half_inv, Z = self.standardize(self.X) + + if isinstance(self.buckets, int): + indices = self.slice_kmeans(self.y) + + zs = [] + ns = [] + for ind in indices: + if np.sum(ind) > 1: + z = np.mean(Z[ind, :].reshape(-1, d), axis=0) + ns.append(np.sum(ind)) + zs.append(z) + Zn = np.array(zs) + V = (Zn.T @ np.diag(ns) @ Zn) / self.buckets + + else: + raise AssertionError("Unknown bucketing rule.") + + lams, eignv = np.linalg.eig(V) + betas = Sigma_x_half_inv @ eignv + return torch.from_numpy(lams), torch.from_numpy(betas) + + def fit_save(self, X, y, buckets=10): + self.X = X + self.y = y + self.buckets = buckets + (n, d) = self.X.size() + Sigma_x_half_inv, Z = self.standardize(self.X) + + if isinstance(self.buckets, int): + indices = self.slice_kmeans(self.y) + + V = np.zeros(shape=(d, d)) + I = np.eye(d) + for ind in indices: + ns = np.sum(ind) + if ns > 1: + Covar_slice = np.cov(Z[ind, :].reshape(-1, d).T) + V = V + ((I - Covar_slice) @ (I - Covar_slice)) * ( + float(ns) / float(n) + ) + + else: + raise AssertionError("Unknown bucketing rule.") + + lams, eignv = np.linalg.eig(V) + betas = Sigma_x_half_inv @ eignv + return torch.from_numpy(lams), torch.from_numpy(betas) + + def gradient_design(self, d, k, nablaF, eps=10e-4): + Sigma = torch.eye(d).double() * eps + x0 = torch.rand(size=(k, d)).double() + subspace = nablaF(x0) + Sigma = Sigma + subspace.T @ subspace + return x0, Sigma, subspace + + def sample_dir(self, n, x0, subspace, eps=10e-4): + indices = np.arange(0, x0.size()[0], 1) + choice = np.random.choice(indices, n, replace=True) + magnitude = np.diag(np.random.randn(n)) + sample = ( + x0.numpy()[choice] + + magnitude @ subspace[choice].numpy() + + eps * np.random.randn(n, d) + ) + return torch.from_numpy(sample) if __name__ == "__main__": - d = 3 - p = 2 + d = 3 + p = 2 - sigma = 0. - A = torch.from_numpy(np.random.randn(d, p)) - A = torch.from_numpy(np.eye(d, p)) - print(A) - # exampel function - f = lambda x: torch.sum((x @ A) ** 2, dim=1) + sigma * torch.randn(x.size()[0], dtype=torch.double) - f_no_noise = lambda x: torch.sum((x @ A) ** 2, dim=1) + sigma = 0.0 + A = torch.from_numpy(np.random.randn(d, p)) + A = torch.from_numpy(np.eye(d, p)) + print(A) + # exampel function + f = lambda x: torch.sum((x @ A) ** 2, dim=1) + sigma * torch.randn( + x.size()[0], dtype=torch.double + ) + f_no_noise = lambda x: torch.sum((x @ A) ** 2, dim=1) - nablaF = lambda x: x @ A @ A.T + nablaF = lambda x: x @ A @ A.T - DimRed = SRI() - N = 100 - x0, Sigma, subspace = DimRed.gradient_design(d, d, nablaF) - X0 = DimRed.sample_dir(N, x0, subspace) - y0 = f(X0) + DimRed = SRI() + N = 100 + x0, Sigma, subspace = DimRed.gradient_design(d, d, nablaF) + X0 = DimRed.sample_dir(N, x0, subspace) + y0 = f(X0) - plt.scatter(X0[:, 0], X0[:, 1], c=y0.view(-1)) - plt.show() + plt.scatter(X0[:, 0], X0[:, 1], c=y0.view(-1)) + plt.show() - lams, betas = DimRed.fit_sri(X0, y0, buckets=20) + lams, betas = DimRed.fit_sri(X0, y0, buckets=20) - print(lams / torch.sum(lams)) - print(betas) + print(lams / torch.sum(lams)) + print(betas) - lams2, betas2 = DimRed.fit_save(X0, y0, buckets=20) + lams2, betas2 = DimRed.fit_save(X0, y0, buckets=20) - print(lams2 / torch.sum(lams2)) - print(betas2) + print(lams2 / torch.sum(lams2)) + print(betas2) diff --git a/stpy/embeddings/bernstein_embedding.py b/stpy/embeddings/bernstein_embedding.py index 3653911..2da90a5 100644 --- a/stpy/embeddings/bernstein_embedding.py +++ b/stpy/embeddings/bernstein_embedding.py @@ -7,105 +7,109 @@ class BernsteinEmbedding(PositiveEmbedding): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - def basis_fun(self, x, j): # 1d - """ - Return the value of basis function \phi_j(x) - :param x: double, need to be in the interval - :param j: integer, index of hat functions, 0 <= j <= m-1 - :return: \phi_j(x) - """ - lim = [self.interval[0], self.interval[1]] - c = np.zeros(shape=(self.m, 1)) - c[j] = 1 - bp = BPoly(c, lim, extrapolate=False) - res = bp(x.numpy()) - value = torch.from_numpy(np.nan_to_num(res)) - return value - - # return torch.from_numpy(bp(x.numpy())) - - def get_polynomial(self, j): - if self.d == 1: - lim = [self.interval[0], self.interval[1]] - c = np.zeros(shape=(self.m, 1)) - c[j] = 1 - roots = PPoly.from_bernstein_basis(BPoly(c, lim)).roots() - poly = np.polynomial.polynomial.Polynomial(np.polynomial.polynomial.polyfromroots(roots), - domain=np.array(lim)) - - elif self.d == 2: - lim = [self.interval[0], self.interval[1]] - k = j // self.m - l = j % self.m - c = np.zeros(shape=(self.m, 1)) - c[k] = 1 - bp = BPoly(c, lim) - c = np.zeros(shape=(self.m, 1)) - c[l] = 1 - bp2 = BPoly(c, lim) - roots1 = PPoly.from_bernstein_basis(bp).roots() - roots2 = PPoly.from_bernstein_basis(bp2).roots() - poly1 = np.polynomial.polynomial.Polynomial(np.polynomial.polynomial.polyfromroots(roots1), - domain=np.array(lim)) - poly2 = np.polynomial.polynomial.Polynomial(np.polynomial.polynomial.polyfromroots(roots2), - domain=np.array(lim)) - poly = poly1 * poly2 - return poly - - def integral(self, S): - assert (S.d == self.d) - psi = torch.zeros(self.get_m()).double() - - if self.d == 1: - a, b = float(S.bounds[0, 0]), float(S.bounds[0, 1]) - for j in range(self.get_m()): - lim = [self.interval[0], self.interval[1]] - c = np.zeros(shape=(self.m, 1)) - c[j] = 1 - bp = BPoly(c, lim) - xa = np.maximum(self.interval[0], a) - xb = np.minimum(self.interval[1], b) - psi[j] = bp.integrate(xa, xb, extrapolate=False) - - elif self.d == 2: - xa, xb = S.bounds[0, 0], S.bounds[0, 1] - ya, yb = S.bounds[1, 0], S.bounds[1, 1] - for j in range(self.get_m()): - lim = [self.interval[0], self.interval[1]] - - k = j // self.m - l = j % self.m - - c = np.zeros(shape=(self.m, 1)) - c[k] = 1 - bp = BPoly(c, lim) - vol1 = bp.integrate(xa, xb) - c = np.zeros(shape=(self.m, 1)) - c[l] = 1 - bp = BPoly(c, lim) - vol2 = bp.integrate(ya, yb) - psi[j] = vol1 * vol2 - - Gamma_half = self.cov() - return psi @ Gamma_half - - def product_integral(self, S): - m = self.get_m() - Psi = torch.zeros(size=(m, m)).double() - a, b = S.bounds[0, 0], S.bounds[0, 1] - for i in range(m): - for j in range(m): - P = self.get_polynomial(i) * self.get_polynomial(j) - new_p = P.integ() - xb = np.minimum(new_p.domain[1], b) - xa = np.maximum(new_p.domain[0], a) - Psi[i, j] = new_p(xb) - new_p(xa) - print(i, j, Psi[i, j]) - Gamma_half = self.cov() - return Gamma_half @ Psi @ Gamma_half.T + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def basis_fun(self, x, j): # 1d + """ + Return the value of basis function \phi_j(x) + :param x: double, need to be in the interval + :param j: integer, index of hat functions, 0 <= j <= m-1 + :return: \phi_j(x) + """ + lim = [self.interval[0], self.interval[1]] + c = np.zeros(shape=(self.m, 1)) + c[j] = 1 + bp = BPoly(c, lim, extrapolate=False) + res = bp(x.numpy()) + value = torch.from_numpy(np.nan_to_num(res)) + return value + + # return torch.from_numpy(bp(x.numpy())) + + def get_polynomial(self, j): + if self.d == 1: + lim = [self.interval[0], self.interval[1]] + c = np.zeros(shape=(self.m, 1)) + c[j] = 1 + roots = PPoly.from_bernstein_basis(BPoly(c, lim)).roots() + poly = np.polynomial.polynomial.Polynomial( + np.polynomial.polynomial.polyfromroots(roots), domain=np.array(lim) + ) + + elif self.d == 2: + lim = [self.interval[0], self.interval[1]] + k = j // self.m + l = j % self.m + c = np.zeros(shape=(self.m, 1)) + c[k] = 1 + bp = BPoly(c, lim) + c = np.zeros(shape=(self.m, 1)) + c[l] = 1 + bp2 = BPoly(c, lim) + roots1 = PPoly.from_bernstein_basis(bp).roots() + roots2 = PPoly.from_bernstein_basis(bp2).roots() + poly1 = np.polynomial.polynomial.Polynomial( + np.polynomial.polynomial.polyfromroots(roots1), domain=np.array(lim) + ) + poly2 = np.polynomial.polynomial.Polynomial( + np.polynomial.polynomial.polyfromroots(roots2), domain=np.array(lim) + ) + poly = poly1 * poly2 + return poly + + def integral(self, S): + assert S.d == self.d + psi = torch.zeros(self.get_m()).double() + + if self.d == 1: + a, b = float(S.bounds[0, 0]), float(S.bounds[0, 1]) + for j in range(self.get_m()): + lim = [self.interval[0], self.interval[1]] + c = np.zeros(shape=(self.m, 1)) + c[j] = 1 + bp = BPoly(c, lim) + xa = np.maximum(self.interval[0], a) + xb = np.minimum(self.interval[1], b) + psi[j] = bp.integrate(xa, xb, extrapolate=False) + + elif self.d == 2: + xa, xb = S.bounds[0, 0], S.bounds[0, 1] + ya, yb = S.bounds[1, 0], S.bounds[1, 1] + for j in range(self.get_m()): + lim = [self.interval[0], self.interval[1]] + + k = j // self.m + l = j % self.m + + c = np.zeros(shape=(self.m, 1)) + c[k] = 1 + bp = BPoly(c, lim) + vol1 = bp.integrate(xa, xb) + c = np.zeros(shape=(self.m, 1)) + c[l] = 1 + bp = BPoly(c, lim) + vol2 = bp.integrate(ya, yb) + psi[j] = vol1 * vol2 + + Gamma_half = self.cov() + return psi @ Gamma_half + + def product_integral(self, S): + m = self.get_m() + Psi = torch.zeros(size=(m, m)).double() + a, b = S.bounds[0, 0], S.bounds[0, 1] + for i in range(m): + for j in range(m): + P = self.get_polynomial(i) * self.get_polynomial(j) + new_p = P.integ() + xb = np.minimum(new_p.domain[1], b) + xa = np.maximum(new_p.domain[0], a) + Psi[i, j] = new_p(xb) - new_p(xa) + print(i, j, Psi[i, j]) + Gamma_half = self.cov() + return Gamma_half @ Psi @ Gamma_half.T + # def cov(self, inverse = False): # if self.precomp == False: @@ -127,272 +131,299 @@ def product_integral(self, S): class BernsteinSplinesOverlapping(PositiveEmbedding): - def __init__(self, *args, degree=4, **kwargs): - super().__init__(*args, **kwargs) - self.degree = degree - - def basis_fun(self, x, q, derivative=0, extrapolate=False): # 1d - """ - Return the value of basis function \phi_j(x) - - :param x: double, need to be in the interval - :param j: integer, index of hat functions, 0 <= j <= m-1 - :return: \phi_j(x) - """ - - j = q // (self.degree // 2) - k = q % (self.degree // 2) - - dm = (self.interval[1] - self.interval[0]) / ((self.m // (self.degree // 2))) # delta m - tj = self.interval[0] + j * dm - lim = [tj, tj + 2 * dm] - - c = np.zeros(shape=(self.degree // 2, 1)) - c[k] = 1. - bp = BPoly(c, lim) - res = bp(x.numpy(), nu=derivative, extrapolate=extrapolate) - - if extrapolate == False: - mask = x.numpy() == (tj + dm / 2) - res[mask] = np.nan - value = torch.from_numpy(np.nan_to_num(res)) - return value - - def integral(self, S): - assert (S.d == self.d) - psi = torch.zeros(self.get_m()).double() - - if self.d == 1: - a, b = float(S.bounds[0, 0]), float(S.bounds[0, 1]) - for q in range(self.get_m()): - j = q // self.degree - k = q % self.degree - - dm = (self.interval[1] - self.interval[0]) / ((self.m // self.degree)) # delta m - tj = self.interval[0] + j * dm - lim = [tj, tj + dm] - c = np.zeros(shape=(self.degree, 1)) - c[k] = 1. - bp = BPoly(c, lim) - xa = np.maximum(tj, a) - xb = np.minimum(tj + dm, b) - psi[q] = np.nan_to_num(bp.integrate(xa, xb, extrapolate=False)) - - elif self.d == 2: - xa, xb = S.bounds[0, 0], S.bounds[0, 1] - ya, yb = S.bounds[1, 0], S.bounds[1, 1] - for z in range(self.get_m()): - q1 = z // self.m - q2 = z % self.m - - j1 = q1 // self.degree - k1 = q1 % self.degree - j2 = q2 // self.degree - k2 = q2 % self.degree - - dm = (self.interval[1] - self.interval[0]) / ((self.m // self.degree)) # delta m - tj1 = self.interval[0] + j1 * dm - tj2 = self.interval[0] + j2 * dm - lim1 = [tj1, tj1 + dm] - lim2 = [tj2, tj2 + dm] - c = np.zeros(shape=(self.degree, 1)) - c[k1] = 1. - bp = BPoly(c, lim1) - vol1 = bp.integrate(xa, xb) - c = np.zeros(shape=(self.degree, 1)) - c[k2] = 1. - bp = BPoly(c, lim2) - vol2 = bp.integrate(ya, yb) - psi[z] = vol1 * vol2 - - Gamma_half = self.cov() - return psi @ Gamma_half + def __init__(self, *args, degree=4, **kwargs): + super().__init__(*args, **kwargs) + self.degree = degree + + def basis_fun(self, x, q, derivative=0, extrapolate=False): # 1d + """ + Return the value of basis function \phi_j(x) + + :param x: double, need to be in the interval + :param j: integer, index of hat functions, 0 <= j <= m-1 + :return: \phi_j(x) + """ + + j = q // (self.degree // 2) + k = q % (self.degree // 2) + + dm = (self.interval[1] - self.interval[0]) / ( + (self.m // (self.degree // 2)) + ) # delta m + tj = self.interval[0] + j * dm + lim = [tj, tj + 2 * dm] + + c = np.zeros(shape=(self.degree // 2, 1)) + c[k] = 1.0 + bp = BPoly(c, lim) + res = bp(x.numpy(), nu=derivative, extrapolate=extrapolate) + + if extrapolate == False: + mask = x.numpy() == (tj + dm / 2) + res[mask] = np.nan + value = torch.from_numpy(np.nan_to_num(res)) + return value + + def integral(self, S): + assert S.d == self.d + psi = torch.zeros(self.get_m()).double() + + if self.d == 1: + a, b = float(S.bounds[0, 0]), float(S.bounds[0, 1]) + for q in range(self.get_m()): + j = q // self.degree + k = q % self.degree + + dm = (self.interval[1] - self.interval[0]) / ( + (self.m // self.degree) + ) # delta m + tj = self.interval[0] + j * dm + lim = [tj, tj + dm] + c = np.zeros(shape=(self.degree, 1)) + c[k] = 1.0 + bp = BPoly(c, lim) + xa = np.maximum(tj, a) + xb = np.minimum(tj + dm, b) + psi[q] = np.nan_to_num(bp.integrate(xa, xb, extrapolate=False)) + + elif self.d == 2: + xa, xb = S.bounds[0, 0], S.bounds[0, 1] + ya, yb = S.bounds[1, 0], S.bounds[1, 1] + for z in range(self.get_m()): + q1 = z // self.m + q2 = z % self.m + + j1 = q1 // self.degree + k1 = q1 % self.degree + j2 = q2 // self.degree + k2 = q2 % self.degree + + dm = (self.interval[1] - self.interval[0]) / ( + (self.m // self.degree) + ) # delta m + tj1 = self.interval[0] + j1 * dm + tj2 = self.interval[0] + j2 * dm + lim1 = [tj1, tj1 + dm] + lim2 = [tj2, tj2 + dm] + c = np.zeros(shape=(self.degree, 1)) + c[k1] = 1.0 + bp = BPoly(c, lim1) + vol1 = bp.integrate(xa, xb) + c = np.zeros(shape=(self.degree, 1)) + c[k2] = 1.0 + bp = BPoly(c, lim2) + vol2 = bp.integrate(ya, yb) + psi[z] = vol1 * vol2 + + Gamma_half = self.cov() + return psi @ Gamma_half class BernsteinSplinesEmbedding(PositiveEmbedding): - def __init__(self, *args, degree=4, derivatives=2, **kwargs): - super().__init__(*args, **kwargs) - self.degree = degree - self.derivatives = derivatives - - # def basis_fun(self, x, j, k, derivative = 0, extrapolate = False): #1d - def basis_fun(self, x, q, derivative=0, extrapolate=False): # 1d - """ - Return the value of basis function \phi_j(x) - - :param x: double, need to be in the interval - :param j: integer, index of hat functions, 0 <= j <= m-1 - :return: \phi_j(x) - """ - - j = q // self.degree - k = q % self.degree - - dm = (self.interval[1] - self.interval[0]) / ((self.m // self.degree)) # delta m - tj = self.interval[0] + j * dm - - lim = [tj, tj + dm] - c = np.zeros(shape=(self.degree, 1)) - c[k] = 1. - bp = BPoly(c, lim) - res = bp(x.numpy(), nu=derivative, extrapolate=extrapolate) - - if extrapolate == False: - mask = x.numpy() == (tj + dm) - res[mask] = np.nan - value = torch.from_numpy(np.nan_to_num(res)) - return value - - def embed_internal_derivative(self, x, l=1, extrapolate=False): - if self.d == 1: - out = torch.zeros(size=(x.size()[0], self.m), dtype=torch.float64) - for j in range(0, self.m, 1): - out[:, j] = self.basis_fun(x, j, derivative=l, extrapolate=extrapolate).view(-1) - return out - - def get_constraints(self): - s = self.m ** self.d - - # positivity constraints - l = np.full(s, self.b) - u = np.full(s, self.B) - I = np.identity(s) - - # pointwise fix - Zs = [] - vs = [] - for j in range(self.derivatives + 1): - no_nodes = (self.m // self.degree) - 1 - Z = np.zeros(shape=(no_nodes, s)) - dm = (self.interval[1] - self.interval[0]) / ((self.m // self.degree)) # delta m - - for i in range(no_nodes): - ti = torch.from_numpy(np.array(self.interval[0] + (i + 1) * dm)).view(1, -1) - Z[i, i * self.degree:i * self.degree + self.degree] = self.embed_internal_derivative(ti, l=j, - extrapolate=True).numpy().reshape( - -1)[i * self.degree:i * self.degree + self.degree] - Z[i, (i + 1) * self.degree:(i + 1) * self.degree + self.degree] = -self.embed_internal_derivative(ti, - l=j, - extrapolate=True).numpy().reshape( - -1)[(i + 1) * self.degree:(i + 1) * self.degree + self.degree] - v = np.zeros(self.m // self.degree - 1) - Zs.append(Z) - vs.append(v) - - Lambda = np.concatenate([I] + Zs) - l = np.concatenate([l] + vs) - u = np.concatenate([u] + vs) - return (l, Lambda, u) - - def integral(self, S): - assert (S.d == self.d) - psi = torch.zeros(self.get_m()).double() - - if self.d == 1: - a, b = float(S.bounds[0, 0]), float(S.bounds[0, 1]) - for q in range(self.get_m()): - j = q // self.degree - k = q % self.degree - - dm = (self.interval[1] - self.interval[0]) / ((self.m // self.degree)) # delta m - tj = self.interval[0] + j * dm - lim = [tj, tj + dm] - c = np.zeros(shape=(self.degree, 1)) - c[k] = 1. - bp = BPoly(c, lim) - xa = np.maximum(tj, a) - xb = np.minimum(tj + dm, b) - psi[q] = np.nan_to_num(bp.integrate(xa, xb, extrapolate=False)) - - elif self.d == 2: - xa, xb = S.bounds[0, 0], S.bounds[0, 1] - ya, yb = S.bounds[1, 0], S.bounds[1, 1] - for z in range(self.get_m()): - q1 = z // self.m - q2 = z % self.m - - j1 = q1 // self.degree - k1 = q1 % self.degree - j2 = q2 // self.degree - k2 = q2 % self.degree - - dm = (self.interval[1] - self.interval[0]) / ((self.m // self.degree)) # delta m - tj1 = self.interval[0] + j1 * dm - tj2 = self.interval[0] + j2 * dm - lim1 = [tj1, tj1 + dm] - lim2 = [tj2, tj2 + dm] - c = np.zeros(shape=(self.degree, 1)) - c[k1] = 1. - bp = BPoly(c, lim1) - vol1 = bp.integrate(xa, xb) - c = np.zeros(shape=(self.degree, 1)) - c[k2] = 1. - bp = BPoly(c, lim2) - vol2 = bp.integrate(ya, yb) - psi[z] = vol1 * vol2 - - Gamma_half = self.cov() - return psi @ Gamma_half - - def product_integral(self, S): - pass + def __init__(self, *args, degree=4, derivatives=2, **kwargs): + super().__init__(*args, **kwargs) + self.degree = degree + self.derivatives = derivatives + + # def basis_fun(self, x, j, k, derivative = 0, extrapolate = False): #1d + def basis_fun(self, x, q, derivative=0, extrapolate=False): # 1d + """ + Return the value of basis function \phi_j(x) + + :param x: double, need to be in the interval + :param j: integer, index of hat functions, 0 <= j <= m-1 + :return: \phi_j(x) + """ + + j = q // self.degree + k = q % self.degree + + dm = (self.interval[1] - self.interval[0]) / ( + (self.m // self.degree) + ) # delta m + tj = self.interval[0] + j * dm + + lim = [tj, tj + dm] + c = np.zeros(shape=(self.degree, 1)) + c[k] = 1.0 + bp = BPoly(c, lim) + res = bp(x.numpy(), nu=derivative, extrapolate=extrapolate) + + if extrapolate == False: + mask = x.numpy() == (tj + dm) + res[mask] = np.nan + value = torch.from_numpy(np.nan_to_num(res)) + return value + + def embed_internal_derivative(self, x, l=1, extrapolate=False): + if self.d == 1: + out = torch.zeros(size=(x.size()[0], self.m), dtype=torch.float64) + for j in range(0, self.m, 1): + out[:, j] = self.basis_fun( + x, j, derivative=l, extrapolate=extrapolate + ).view(-1) + return out + + def get_constraints(self): + s = self.m**self.d + + # positivity constraints + l = np.full(s, self.b) + u = np.full(s, self.B) + I = np.identity(s) + + # pointwise fix + Zs = [] + vs = [] + for j in range(self.derivatives + 1): + no_nodes = (self.m // self.degree) - 1 + Z = np.zeros(shape=(no_nodes, s)) + dm = (self.interval[1] - self.interval[0]) / ( + (self.m // self.degree) + ) # delta m + + for i in range(no_nodes): + ti = torch.from_numpy(np.array(self.interval[0] + (i + 1) * dm)).view( + 1, -1 + ) + Z[i, i * self.degree : i * self.degree + self.degree] = ( + self.embed_internal_derivative(ti, l=j, extrapolate=True) + .numpy() + .reshape(-1)[i * self.degree : i * self.degree + self.degree] + ) + Z[i, (i + 1) * self.degree : (i + 1) * self.degree + self.degree] = ( + -self.embed_internal_derivative(ti, l=j, extrapolate=True) + .numpy() + .reshape(-1)[ + (i + 1) * self.degree : (i + 1) * self.degree + self.degree + ] + ) + v = np.zeros(self.m // self.degree - 1) + Zs.append(Z) + vs.append(v) + + Lambda = np.concatenate([I] + Zs) + l = np.concatenate([l] + vs) + u = np.concatenate([u] + vs) + return (l, Lambda, u) + + def integral(self, S): + assert S.d == self.d + psi = torch.zeros(self.get_m()).double() + + if self.d == 1: + a, b = float(S.bounds[0, 0]), float(S.bounds[0, 1]) + for q in range(self.get_m()): + j = q // self.degree + k = q % self.degree + + dm = (self.interval[1] - self.interval[0]) / ( + (self.m // self.degree) + ) # delta m + tj = self.interval[0] + j * dm + lim = [tj, tj + dm] + c = np.zeros(shape=(self.degree, 1)) + c[k] = 1.0 + bp = BPoly(c, lim) + xa = np.maximum(tj, a) + xb = np.minimum(tj + dm, b) + psi[q] = np.nan_to_num(bp.integrate(xa, xb, extrapolate=False)) + + elif self.d == 2: + xa, xb = S.bounds[0, 0], S.bounds[0, 1] + ya, yb = S.bounds[1, 0], S.bounds[1, 1] + for z in range(self.get_m()): + q1 = z // self.m + q2 = z % self.m + + j1 = q1 // self.degree + k1 = q1 % self.degree + j2 = q2 // self.degree + k2 = q2 % self.degree + + dm = (self.interval[1] - self.interval[0]) / ( + (self.m // self.degree) + ) # delta m + tj1 = self.interval[0] + j1 * dm + tj2 = self.interval[0] + j2 * dm + lim1 = [tj1, tj1 + dm] + lim2 = [tj2, tj2 + dm] + c = np.zeros(shape=(self.degree, 1)) + c[k1] = 1.0 + bp = BPoly(c, lim1) + vol1 = bp.integrate(xa, xb) + c = np.zeros(shape=(self.degree, 1)) + c[k2] = 1.0 + bp = BPoly(c, lim2) + vol2 = bp.integrate(ya, yb) + psi[z] = vol1 * vol2 + + Gamma_half = self.cov() + return psi @ Gamma_half + + def product_integral(self, S): + pass if __name__ == "__main__": - from stpy.continuous_processes.gauss_procc import GaussianProcess - from stpy.helpers.helper import interval - import matplotlib.pyplot as plt - from stpy.kernels import KernelFunction - from stpy.embeddings.bump_bases import FaberSchauderEmbedding - - d = 1 - m = 32 - n = 64 - N = 10 - - sqrtbeta = 2 - s = 0.001 - b = 0.0 - B = 200 - - gamma = 0.1 - kernel_object = KernelFunction(gamma=gamma) - - # Emb = BernsteinSplinesEmbedding(d, m,kernel_object=kernel_object, offset=0.5,b=b,B=B,s = s) - EmbBern = BernsteinEmbedding(d, m, kernel_object=kernel_object, offset=0.5, b=b, B=B, s=s) - EmbFaber = FaberSchauderEmbedding(d, m, kernel_object=kernel_object, offset=0.5, b=b, B=B, s=s) - GP = GaussianProcess(d=d, s=s, kernel=kernel_object) - # GPNyst = KernelizedFeatures(embedding=EmbNys.GP,m = m, s = s,) - - xtest = torch.from_numpy(interval(n, d, L_infinity_ball=1.1)) - x = torch.from_numpy(np.random.uniform(-1, 1, N)).view(-1, 1) - - F_true = lambda x: torch.sin(x) ** 2 - 0.1 - F = lambda x: F_true(x) + s * torch.randn(x.size()[0]).view(-1, 1).double() - y = F(x) - - # Emb.fit_gp(x,y) - EmbBern.fit(x, y) - EmbFaber.fit(x, y) - - GP.fit_gp(x, y) - - # mu = Emb.mean_std(xtest) - mu_true, _ = GP.mean_std(xtest) - mu_bern = EmbBern.mean_std(xtest) - mu_faber = EmbFaber.mean_std(xtest) - - plt.plot(xtest, xtest * 0 + b, 'k--') - # plt.plot(xtest, xtest * 0 + B, 'k--') - - plt.plot(xtest, F_true(xtest), 'r', label='true') - # plt.plot(xtest,mu_true_nyst,color = 'lightblue', label = 'Nystrom') - plt.plot(xtest, mu_true, 'b--', label='no-constraints') - - plt.plot(x, y, 'ro') - # plt.plot(xtest, mu, 'g-x', label = 'splines Bernstein') - plt.plot(xtest, mu_bern, 'y-o', label='Bernstein basis') - plt.plot(xtest, mu_faber, 'g-o', label='Faber basis') - plt.legend() - plt.show() + from stpy.continuous_processes.gauss_procc import GaussianProcess + from stpy.helpers.helper import interval + import matplotlib.pyplot as plt + from stpy.kernels import KernelFunction + from stpy.embeddings.bump_bases import FaberSchauderEmbedding + + d = 1 + m = 32 + n = 64 + N = 10 + + sqrtbeta = 2 + s = 0.001 + b = 0.0 + B = 200 + + gamma = 0.1 + kernel_object = KernelFunction(gamma=gamma) + + # Emb = BernsteinSplinesEmbedding(d, m,kernel_object=kernel_object, offset=0.5,b=b,B=B,s = s) + EmbBern = BernsteinEmbedding( + d, m, kernel_object=kernel_object, offset=0.5, b=b, B=B, s=s + ) + EmbFaber = FaberSchauderEmbedding( + d, m, kernel_object=kernel_object, offset=0.5, b=b, B=B, s=s + ) + GP = GaussianProcess(d=d, s=s, kernel=kernel_object) + # GPNyst = KernelizedFeatures(embedding=EmbNys.GP,m = m, s = s,) + + xtest = torch.from_numpy(interval(n, d, L_infinity_ball=1.1)) + x = torch.from_numpy(np.random.uniform(-1, 1, N)).view(-1, 1) + + F_true = lambda x: torch.sin(x) ** 2 - 0.1 + F = lambda x: F_true(x) + s * torch.randn(x.size()[0]).view(-1, 1).double() + y = F(x) + + # Emb.fit_gp(x,y) + EmbBern.fit(x, y) + EmbFaber.fit(x, y) + + GP.fit_gp(x, y) + + # mu = Emb.mean_std(xtest) + mu_true, _ = GP.mean_std(xtest) + mu_bern = EmbBern.mean_std(xtest) + mu_faber = EmbFaber.mean_std(xtest) + + plt.plot(xtest, xtest * 0 + b, "k--") + # plt.plot(xtest, xtest * 0 + B, 'k--') + + plt.plot(xtest, F_true(xtest), "r", label="true") + # plt.plot(xtest,mu_true_nyst,color = 'lightblue', label = 'Nystrom') + plt.plot(xtest, mu_true, "b--", label="no-constraints") + + plt.plot(x, y, "ro") + # plt.plot(xtest, mu, 'g-x', label = 'splines Bernstein') + plt.plot(xtest, mu_bern, "y-o", label="Bernstein basis") + plt.plot(xtest, mu_faber, "g-o", label="Faber basis") + plt.legend() + plt.show() diff --git a/stpy/embeddings/bump_bases.py b/stpy/embeddings/bump_bases.py index 3e71555..587daf4 100644 --- a/stpy/embeddings/bump_bases.py +++ b/stpy/embeddings/bump_bases.py @@ -11,120 +11,119 @@ class TriangleEmbedding(PositiveEmbedding): - def __init__(self, *args, **kwargs): - - super().__init__(*args, **kwargs) - - def basis_fun(self, x, j): - """ - Return the value of basis function \phi_j(x) - - :param x: double, need to be in the interval - :param j: integer, index of hat functions, 0 <= j <= m-1 - :return: \phi_j(x) - """ - - dm = (self.interval[1] - self.interval[0]) / (self.m - 1) # delta m - tj = self.interval[0] + (j) * dm - res = 1 - torch.abs((x - tj) / dm) - res[res < 0] = 0 - return res - - def integrate_1d(self, a, b, tj, dm): - """ - :param a: from - :param b: to - :param tj: node - :param dm: width - :return: - """ - if a <= tj - dm and b >= tj + dm: # contained - vol = 1. * dm - - elif a >= tj + dm or b <= tj - dm: # outside - vol = 0. - - elif a <= tj - dm and b >= tj and b <= tj + dm: # a out , b inside second half - res = max(1. - np.abs((b - tj) / dm), 0) - vol = dm * 0.5 + (b - tj) * (1. + res) / 2. - - elif b >= tj + dm and a <= tj and a >= tj - dm: # b out, a inside first half - res = max(1. - np.abs((a - tj) / dm), 0) - vol = dm * 0.5 + (tj - a) * (1. + res) / 2. - - elif a <= tj - dm and b <= tj and b >= tj - dm: # a out, b inside first half - res = max(1. - np.abs((b - tj) / dm), 0) - vol = 0.5 * (b - (tj - dm)) * res - - elif b >= tj + dm and a >= tj and a <= tj + dm: # b out, a inside second half - res = max(1. - np.abs((a - tj) / dm), 0) - vol = 0.5 * ((tj + dm) - a) * res - - - else: # inside - resa = max(1. - np.abs((a - tj) / dm), 0) - resb = max(1. - np.abs((b - tj) / dm), 0) - - if b <= tj: - vol = (b - a) * (resb + resa) / 2. - elif a >= tj: - vol = (b - a) * (resa + resb) / 2. - else: - vol = (tj - a) * (1 + resa) / 2. + (b - tj) * (resb + 1) / 2. - - return vol - - def integral(self, S): - """ - Integrate the Phi(x) over S - :param S: borel set - :return: - """ - if S in self.procomp_integrals.keys(): - return self.procomp_integrals[S] - - - else: - assert (S.d == self.d) - psi = torch.zeros(self.get_m()).double() - if S.type == "box": - if self.d == 1: - dm = (self.interval[1] - self.interval[0]) / (self.m - 1) # delta m - a, b = S.bounds[0, 0], S.bounds[0, 1] - for j in range(self.get_m()): - tj = self.interval[0] + j * dm - vol = self.integrate_1d(a.numpy(), b.numpy(), tj, dm) - psi[j] = vol - - elif self.d == 2: - dm = (self.interval[1] - self.interval[0]) / (self.m - 1) # delta m - - xa, xb = S.bounds[0, 0], S.bounds[0, 1] - ya, yb = S.bounds[1, 0], S.bounds[1, 1] - - for j in range(self.get_m()): - tj = self.interval[0] + (j % self.m) * dm - tk = self.interval[0] + (j // self.m) * dm - - # triangle center point - # center_point = torch.Tensor( [tj,tk]).view(-1,1) - vol = self.integrate_1d(xa.numpy(), xb.numpy(), tk, dm) - vol2 = self.integrate_1d(ya.numpy(), yb.numpy(), tj, dm) - psi[j] = vol * vol2 - # if torch.sum(S.is_inside(center_point)): - # psi[j] = (dm**2)/3. - else: - raise ("more than 2D not implemented.") - - elif S.type == "round": - weights, nodes = S.return_legendre_discretization(30) - vals = self.embed_internal(nodes) - psi = weights.view(1, -1) @ vals - - Gamma_half = self.cov() - emb = psi @ Gamma_half - self.procomp_integrals[S] = emb - return emb + def __init__(self, *args, **kwargs): + + super().__init__(*args, **kwargs) + + def basis_fun(self, x, j): + """ + Return the value of basis function \phi_j(x) + + :param x: double, need to be in the interval + :param j: integer, index of hat functions, 0 <= j <= m-1 + :return: \phi_j(x) + """ + + dm = (self.interval[1] - self.interval[0]) / (self.m - 1) # delta m + tj = self.interval[0] + (j) * dm + res = 1 - torch.abs((x - tj) / dm) + res[res < 0] = 0 + return res + + def integrate_1d(self, a, b, tj, dm): + """ + :param a: from + :param b: to + :param tj: node + :param dm: width + :return: + """ + if a <= tj - dm and b >= tj + dm: # contained + vol = 1.0 * dm + + elif a >= tj + dm or b <= tj - dm: # outside + vol = 0.0 + + elif a <= tj - dm and b >= tj and b <= tj + dm: # a out , b inside second half + res = max(1.0 - np.abs((b - tj) / dm), 0) + vol = dm * 0.5 + (b - tj) * (1.0 + res) / 2.0 + + elif b >= tj + dm and a <= tj and a >= tj - dm: # b out, a inside first half + res = max(1.0 - np.abs((a - tj) / dm), 0) + vol = dm * 0.5 + (tj - a) * (1.0 + res) / 2.0 + + elif a <= tj - dm and b <= tj and b >= tj - dm: # a out, b inside first half + res = max(1.0 - np.abs((b - tj) / dm), 0) + vol = 0.5 * (b - (tj - dm)) * res + + elif b >= tj + dm and a >= tj and a <= tj + dm: # b out, a inside second half + res = max(1.0 - np.abs((a - tj) / dm), 0) + vol = 0.5 * ((tj + dm) - a) * res + + else: # inside + resa = max(1.0 - np.abs((a - tj) / dm), 0) + resb = max(1.0 - np.abs((b - tj) / dm), 0) + + if b <= tj: + vol = (b - a) * (resb + resa) / 2.0 + elif a >= tj: + vol = (b - a) * (resa + resb) / 2.0 + else: + vol = (tj - a) * (1 + resa) / 2.0 + (b - tj) * (resb + 1) / 2.0 + + return vol + + def integral(self, S): + """ + Integrate the Phi(x) over S + :param S: borel set + :return: + """ + if S in self.procomp_integrals.keys(): + return self.procomp_integrals[S] + + else: + assert S.d == self.d + psi = torch.zeros(self.get_m()).double() + if S.type == "box": + if self.d == 1: + dm = (self.interval[1] - self.interval[0]) / (self.m - 1) # delta m + a, b = S.bounds[0, 0], S.bounds[0, 1] + for j in range(self.get_m()): + tj = self.interval[0] + j * dm + vol = self.integrate_1d(a.numpy(), b.numpy(), tj, dm) + psi[j] = vol + + elif self.d == 2: + dm = (self.interval[1] - self.interval[0]) / (self.m - 1) # delta m + + xa, xb = S.bounds[0, 0], S.bounds[0, 1] + ya, yb = S.bounds[1, 0], S.bounds[1, 1] + + for j in range(self.get_m()): + tj = self.interval[0] + (j % self.m) * dm + tk = self.interval[0] + (j // self.m) * dm + + # triangle center point + # center_point = torch.Tensor( [tj,tk]).view(-1,1) + vol = self.integrate_1d(xa.numpy(), xb.numpy(), tk, dm) + vol2 = self.integrate_1d(ya.numpy(), yb.numpy(), tj, dm) + psi[j] = vol * vol2 + # if torch.sum(S.is_inside(center_point)): + # psi[j] = (dm**2)/3. + else: + raise ("more than 2D not implemented.") + + elif S.type == "round": + weights, nodes = S.return_legendre_discretization(30) + vals = self.embed_internal(nodes) + psi = weights.view(1, -1) @ vals + + Gamma_half = self.cov() + emb = psi @ Gamma_half + self.procomp_integrals[S] = emb + return emb + # def product_integral(self, S): # assert( S.d == self.d) @@ -145,286 +144,303 @@ def integral(self, S): class FaberSchauderEmbedding(TriangleEmbedding): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - if int(np.log2(self.m)) != np.log2(self.m): - raise AssertionError("This basis works only with log_2(n) is integer.") - - def basis_fun(self, x, j): - """ - Return the value of basis function \phi_j(x) - - :param x: double, need to be in the interval - :param j: integer, index of hat functions, 0 <= j <= m-1 - :return: \phi_j(x) - """ - if j == 0: - res = x * 0 + 1 - elif j == 1: - dm = (self.interval[1] - self.interval[0]) / 2 # delta m - res = 1 - torch.abs((x) / dm) - res[res < 0] = 0 - else: - level = np.floor(np.log2(j)) - start = 2 ** level - dm = (self.interval[1] - self.interval[0]) / (2 * start) - tj = self.interval[0] + (j - start) * 2 * dm + dm - res = 1 - torch.abs((x - tj) / dm) - res[res < 0] = 0 - return res - - def integral(self, S): - assert (S.d == self.d) - psi = torch.zeros(self.get_m()).double() - - if self.d == 1: - a, b = S.bounds[0, 0], S.bounds[0, 1] - for j in range(self.get_m()): - if j == 0: - vol = (b - a) - elif j == 1: - dm = (self.interval[1] - self.interval[0]) / 2 # delta m - vol = self.integrate_1d(a.numpy(), b.numpy(), 0, dm) - else: - level = np.floor(np.log2(j)) - start = 2 ** level - dm = (self.interval[1] - self.interval[0]) / (2 * start) - tj = self.interval[0] + (j - start) * 2 * dm + dm - vol = self.integrate_1d(a.numpy(), b.numpy(), tj, dm) - psi[j] = vol - return psi - - def hierarchical_mask(self): - mask = [0] - for i in range(int(np.log2(self.m))): - for j in range(2**i): - mask.append(i+1) - return torch.Tensor(mask) - - def product_integral(self): - raise NotImplementedError("Not implemented.") - pass + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + if int(np.log2(self.m)) != np.log2(self.m): + raise AssertionError("This basis works only with log_2(n) is integer.") + + def basis_fun(self, x, j): + """ + Return the value of basis function \phi_j(x) + + :param x: double, need to be in the interval + :param j: integer, index of hat functions, 0 <= j <= m-1 + :return: \phi_j(x) + """ + if j == 0: + res = x * 0 + 1 + elif j == 1: + dm = (self.interval[1] - self.interval[0]) / 2 # delta m + res = 1 - torch.abs((x) / dm) + res[res < 0] = 0 + else: + level = np.floor(np.log2(j)) + start = 2**level + dm = (self.interval[1] - self.interval[0]) / (2 * start) + tj = self.interval[0] + (j - start) * 2 * dm + dm + res = 1 - torch.abs((x - tj) / dm) + res[res < 0] = 0 + return res + + def integral(self, S): + assert S.d == self.d + psi = torch.zeros(self.get_m()).double() + + if self.d == 1: + a, b = S.bounds[0, 0], S.bounds[0, 1] + for j in range(self.get_m()): + if j == 0: + vol = b - a + elif j == 1: + dm = (self.interval[1] - self.interval[0]) / 2 # delta m + vol = self.integrate_1d(a.numpy(), b.numpy(), 0, dm) + else: + level = np.floor(np.log2(j)) + start = 2**level + dm = (self.interval[1] - self.interval[0]) / (2 * start) + tj = self.interval[0] + (j - start) * 2 * dm + dm + vol = self.integrate_1d(a.numpy(), b.numpy(), tj, dm) + psi[j] = vol + return psi + + def hierarchical_mask(self): + mask = [0] + for i in range(int(np.log2(self.m))): + for j in range(2**i): + mask.append(i + 1) + return torch.Tensor(mask) + + def product_integral(self): + raise NotImplementedError("Not implemented.") + pass class KuhnExponentialEmbedding(PositiveEmbedding): - """ - Basis from: Covering numbers of Gaussian reproducing kernel Hilbert spaces - by Thomas Kuhn + """ + Basis from: Covering numbers of Gaussian reproducing kernel Hilbert spaces + by Thomas Kuhn - """ + """ - def __init__(self, *args, gamma=0.1, **kwargs): - super().__init__(self, *args, **kwargs) - self.gamma = gamma + def __init__(self, *args, gamma=0.1, **kwargs): + super().__init__(self, *args, **kwargs) + self.gamma = gamma - def basis_fun(self, x, j): - k = np.exp(j / 2 * np.log(1. / self.gamma) - (j / 2) * scipy.special.gammaln(j + 1)) - res = k * (x ** j) * torch.exp(- (x ** 2) / (2 * self.gamma ** 2)) - mask1 = x < 0 - mask2 = x > 1 - res[mask1] = 0. - res[mask2] = 0. - return res + def basis_fun(self, x, j): + k = np.exp( + j / 2 * np.log(1.0 / self.gamma) - (j / 2) * scipy.special.gammaln(j + 1) + ) + res = k * (x**j) * torch.exp(-(x**2) / (2 * self.gamma**2)) + mask1 = x < 0 + mask2 = x > 1 + res[mask1] = 0.0 + res[mask2] = 0.0 + return res class CustomHaarBumps(PositiveEmbedding): - """ + """ - Custom Haar basis that cover different sized pockets of domain + Custom Haar basis that cover different sized pockets of domain - """ + """ - # def __init__(self, *args, **kwargs): - # super().__init__(self,*args, **kwargs) - # nodes = None - # widths = None - # self.nodes = nodes - # self.widths = widths + # def __init__(self, *args, **kwargs): + # super().__init__(self,*args, **kwargs) + # nodes = None + # widths = None + # self.nodes = nodes + # self.widths = widths - def __init__(self, d, m, nodes, widths, weights, **kwargs): - super().__init__(d, m, **kwargs) - self.nodes = nodes - self.widths = widths - self.weights = weights + def __init__(self, d, m, nodes, widths, weights, **kwargs): + super().__init__(d, m, **kwargs) + self.nodes = nodes + self.widths = widths + self.weights = weights - def basis_fun(self, x, j): + def basis_fun(self, x, j): - if self.nodes is None or self.widths is None: - super().basis_fun(x, j) - else: - mask = np.abs(x - self.nodes[j]) < self.widths[j] - out = x * 0 - out[mask] = self.weights[j] - return out + if self.nodes is None or self.widths is None: + super().basis_fun(x, j) + else: + mask = np.abs(x - self.nodes[j]) < self.widths[j] + out = x * 0 + out[mask] = self.weights[j] + return out class BumpsEmbedding(PositiveEmbedding): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) - def integrate(self, a, b, j): - vol = 0. - return vol + def integrate(self, a, b, j): + vol = 0.0 + return vol - def integral(self, S): - """ - Integrate the Phi(x) over S - :param S: borel set - :return: - """ - assert (S.d == self.d) - psi = torch.zeros(self.get_m()).double() + def integral(self, S): + """ + Integrate the Phi(x) over S + :param S: borel set + :return: + """ + assert S.d == self.d + psi = torch.zeros(self.get_m()).double() - a, b = S.bounds[0, 0], S.bounds[0, 1] - for j in range(self.get_m()): - vol = self.integrate(a.numpy(), b.numpy(), j) - psi[j] = vol + a, b = S.bounds[0, 0], S.bounds[0, 1] + for j in range(self.get_m()): + vol = self.integrate(a.numpy(), b.numpy(), j) + psi[j] = vol - def basis_fun(self, x, j): # 1d - """ - Return the value of basis function \phi_j(x) + def basis_fun(self, x, j): # 1d + """ + Return the value of basis function \phi_j(x) - :param x: double, need to be in the interval - :param j: integer, index of hat functions, 0 <= j <= m-1 - :return: \phi_j(x) - """ + :param x: double, need to be in the interval + :param j: integer, index of hat functions, 0 <= j <= m-1 + :return: \phi_j(x) + """ - dm = (self.interval[1] - self.interval[0]) / (self.m - 1) # delta m - tj = self.interval[0] + (j) * dm - res = -(x - tj) * (x - (tj + (2 * dm))) * (1. / (dm ** 2)) - res[res < 0] = 0 - return res + dm = (self.interval[1] - self.interval[0]) / (self.m - 1) # delta m + tj = self.interval[0] + (j) * dm + res = -(x - tj) * (x - (tj + (2 * dm))) * (1.0 / (dm**2)) + res[res < 0] = 0 + return res class PositiveNystromEmbeddingBump(PositiveEmbedding): - def __init__(self, *args, samples=300, **kwargs): - super().__init__(*args, **kwargs) - self.samples = np.maximum(samples, self.m) - - B = BorelSet(1, torch.Tensor([[self.interval[0], self.interval[1]]]).double()) - x = B.return_discretization(256) - y = x[:, 0].view(-1, 1) * 0 - - print("Starting optimal basis construction, with m =", self.m) - self.new_kernel_object = KernelFunction(kernel_name=self.kernel_object.optkernel, - gamma=self.kernel_object.gamma) - self.GP = NystromFeatures(self.new_kernel_object, m=self.m, approx='positive_svd', - samples=self.samples) - self.GP.fit_gp(x, y) - print("Optimal basis constructed.") - if torch.sum(torch.isnan(self.GP.embed(x))) > 0: - print("Failed basis? (zero is good):", torch.sum(torch.isnan(self.GP.embed(x)))) - - self.precomp_integral = {} - - def basis_fun(self, x, j): - return self.GP.embed(x)[:, j].view(-1, 1) - - def get_constraints(self): - s = self.m ** self.d - l = np.full(s, 0.0).astype(float) - u = np.full(s, 10e10) - Lambda = np.identity(s) - return (l, Lambda, u) - - def integral(self, S): - assert (S.d == self.d) - - if S in self.precomp_integral.keys(): - return self.precomp_integral[S] - else: - if S.d == 1: - weights, nodes = S.return_legendre_discretization(n=256) - psi = torch.sum(torch.diag(weights) @ self.GP.embed(nodes), dim=0) - Gamma_half = self.cov() - psi = Gamma_half.T @ psi - self.precomp_integral[S] = psi - elif S.d == 2: - weights, nodes = S.return_legendre_discretization(n=50) - vals = self.embed_internal(nodes) - psi = torch.sum(torch.diag(weights) @ vals, dim=0) - Gamma_half = self.cov() - psi = Gamma_half.T @ psi - self.precomp_integral[S] = psi - if torch.sum(torch.isnan(psi)) > 0: - print("Failed integrals? (0 is good):", torch.sum(torch.isnan(psi))) - - else: - raise NotImplementedError("Higher dimension not implemented.") - return psi - - def cov(self, inverse=False): - - if self.precomp == False: - - x = torch.linspace(self.interval[0], self.interval[1], 256) - vals = self.GP.embed(x) - indices = torch.argmax(vals, dim=0) - t = x[indices] - - if self.d == 1: - t = t.view(-1, 1).double() - elif self.d == 2: - t = torch.from_numpy(cartesian([t.numpy(), t.numpy()])).double() - elif self.d == 3: - t = torch.from_numpy(cartesian([t.numpy(), t.numpy(), t.numpy()])).double() - - self.Gamma = self.kernel(t, t) - Z = self.embed_internal(t) - - M = torch.pinverse(Z.T @ Z + (self.s) * torch.eye(self.Gamma.size()[0])) - self.M = torch.from_numpy(np.real(scipy.linalg.sqrtm(M.numpy()))) - - # self.Gamma_half = torch.cholesky(Gamma \ - # + self.s * self.s * torch.eye(Gamma.size()[0]).double(), upper = True ) - - self.Gamma_half = torch.from_numpy( - np.real(scipy.linalg.sqrtm(self.Gamma.numpy() + (self.s ** 2) * np.eye(self.Gamma.size()[0])))) - self.Gamma_half = self.M @ self.Gamma_half - self.invGamma_half = torch.pinverse(self.Gamma_half) - self.precomp = True - else: - pass - - if inverse == True: - return self.Gamma_half, self.invGamma_half - else: - return self.Gamma_half + def __init__(self, *args, samples=300, **kwargs): + super().__init__(*args, **kwargs) + self.samples = np.maximum(samples, self.m) + + B = BorelSet(1, torch.Tensor([[self.interval[0], self.interval[1]]]).double()) + x = B.return_discretization(256) + y = x[:, 0].view(-1, 1) * 0 + + print("Starting optimal basis construction, with m =", self.m) + self.new_kernel_object = KernelFunction( + kernel_name=self.kernel_object.optkernel, gamma=self.kernel_object.gamma + ) + self.GP = NystromFeatures( + self.new_kernel_object, + m=self.m, + approx="positive_svd", + samples=self.samples, + ) + self.GP.fit_gp(x, y) + print("Optimal basis constructed.") + if torch.sum(torch.isnan(self.GP.embed(x))) > 0: + print( + "Failed basis? (zero is good):", + torch.sum(torch.isnan(self.GP.embed(x))), + ) + + self.precomp_integral = {} + + def basis_fun(self, x, j): + return self.GP.embed(x)[:, j].view(-1, 1) + + def get_constraints(self): + s = self.m**self.d + l = np.full(s, 0.0).astype(float) + u = np.full(s, 10e10) + Lambda = np.identity(s) + return (l, Lambda, u) + + def integral(self, S): + assert S.d == self.d + + if S in self.precomp_integral.keys(): + return self.precomp_integral[S] + else: + if S.d == 1: + weights, nodes = S.return_legendre_discretization(n=256) + psi = torch.sum(torch.diag(weights) @ self.GP.embed(nodes), dim=0) + Gamma_half = self.cov() + psi = Gamma_half.T @ psi + self.precomp_integral[S] = psi + elif S.d == 2: + weights, nodes = S.return_legendre_discretization(n=50) + vals = self.embed_internal(nodes) + psi = torch.sum(torch.diag(weights) @ vals, dim=0) + Gamma_half = self.cov() + psi = Gamma_half.T @ psi + self.precomp_integral[S] = psi + if torch.sum(torch.isnan(psi)) > 0: + print("Failed integrals? (0 is good):", torch.sum(torch.isnan(psi))) + + else: + raise NotImplementedError("Higher dimension not implemented.") + return psi + + def cov(self, inverse=False): + + if self.precomp == False: + + x = torch.linspace(self.interval[0], self.interval[1], 256) + vals = self.GP.embed(x) + indices = torch.argmax(vals, dim=0) + t = x[indices] + + if self.d == 1: + t = t.view(-1, 1).double() + elif self.d == 2: + t = torch.from_numpy(cartesian([t.numpy(), t.numpy()])).double() + elif self.d == 3: + t = torch.from_numpy( + cartesian([t.numpy(), t.numpy(), t.numpy()]) + ).double() + + self.Gamma = self.kernel(t, t) + Z = self.embed_internal(t) + + M = torch.pinverse(Z.T @ Z + (self.s) * torch.eye(self.Gamma.size()[0])) + self.M = torch.from_numpy(np.real(scipy.linalg.sqrtm(M.numpy()))) + + # self.Gamma_half = torch.cholesky(Gamma \ + # + self.s * self.s * torch.eye(Gamma.size()[0]).double(), upper = True ) + + self.Gamma_half = torch.from_numpy( + np.real( + scipy.linalg.sqrtm( + self.Gamma.numpy() + (self.s**2) * np.eye(self.Gamma.size()[0]) + ) + ) + ) + self.Gamma_half = self.M @ self.Gamma_half + self.invGamma_half = torch.pinverse(self.Gamma_half) + self.precomp = True + else: + pass + + if inverse == True: + return self.Gamma_half, self.invGamma_half + else: + return self.Gamma_half if __name__ == "__main__": - from stpy.continuous_processes.gauss_procc import GaussianProcess - from stpy.helpers.helper import interval - import matplotlib.pyplot as plt - - d = 1 - m = 32 - n = 64 - N = 20 - sqrtbeta = 2 - s = 0.01 - b = 0 - gamma = 0.1 - k = KernelFunction(gamma=gamma) - - Emb = FaberSchauderEmbedding(d, m, offset=0.2, s=s, b=b, B=1000., kernel_object=k) - GP = GaussianProcess(d=d, s=s) - xtest = torch.from_numpy(interval(n, d)) - - x = torch.from_numpy(np.random.uniform(-1, 1, N)).view(-1, 1) - - F_true = lambda x: torch.sin(x) ** 2 - 0.1 - F = lambda x: F_true(x) + s * torch.randn(x.size()[0]).view(-1, 1).double() - y = F(x) - Emb.fit(x, y) - GP.fit_gp(x, y) - mu = Emb.mean_std(xtest) - mu_true, _ = GP.mean_std(xtest) - plt.plot(xtest, F_true(xtest), 'b', label='true') - plt.plot(xtest, mu_true, 'b--', label='GP') - plt.plot(x, y, 'ro') - plt.plot(xtest, mu, 'g-', label='positive basis ') - plt.legend() - plt.show() + from stpy.continuous_processes.gauss_procc import GaussianProcess + from stpy.helpers.helper import interval + import matplotlib.pyplot as plt + + d = 1 + m = 32 + n = 64 + N = 20 + sqrtbeta = 2 + s = 0.01 + b = 0 + gamma = 0.1 + k = KernelFunction(gamma=gamma) + + Emb = FaberSchauderEmbedding(d, m, offset=0.2, s=s, b=b, B=1000.0, kernel_object=k) + GP = GaussianProcess(d=d, s=s) + xtest = torch.from_numpy(interval(n, d)) + + x = torch.from_numpy(np.random.uniform(-1, 1, N)).view(-1, 1) + + F_true = lambda x: torch.sin(x) ** 2 - 0.1 + F = lambda x: F_true(x) + s * torch.randn(x.size()[0]).view(-1, 1).double() + y = F(x) + Emb.fit(x, y) + GP.fit_gp(x, y) + mu = Emb.mean_std(xtest) + mu_true, _ = GP.mean_std(xtest) + plt.plot(xtest, F_true(xtest), "b", label="true") + plt.plot(xtest, mu_true, "b--", label="GP") + plt.plot(x, y, "ro") + plt.plot(xtest, mu, "g-", label="positive basis ") + plt.legend() + plt.show() diff --git a/stpy/embeddings/embedding.py b/stpy/embeddings/embedding.py index bdbafbb..45e0dcf 100755 --- a/stpy/embeddings/embedding.py +++ b/stpy/embeddings/embedding.py @@ -50,89 +50,105 @@ import stpy.helpers.quadrature_helper as quad_help -class Embedding(): - """ - Base class for Embeddings to approximate kernels with a higher dimensional linear product. - """ - - def __init__(self, gamma=0.1, nu=0.5, m=100, d=1, diameter=1.0, groups=None, kappa=1.0, - kernel="squared_exponential", cosine=False, approx="rff", **kwargs): - """ - Called to calculate the embedding weights (either via sampling or deterministically) - - Args: - gamma: (positional, 0.1) bandwidth of the squared exponential kernel - nu: (positional, 0.5) the parameter of Matern family - m: (positional, 1) - d: dimension of the - - Returns: - None - """ - self.gamma = float(gamma) - self.n = nu - self.m = int(m) - self.d = int(d) - self.nu = nu - self.kappa = kappa - self.cosine = cosine - self.diameter = diameter - self.groups = groups - self.kernel = kernel - self.approx = approx - self.gradient_avail = 0 - if self.m % 2 == 1: - raise AssertionError("Number of random features has to be even.") - - def sample(self): - """ - Called to calculate the embedding weights (either via sampling or deterministically) - - Args: - None - - Returns: - None - """ - raise AttributeError("Only derived classes can call this method.") - - def embed(self, x): - """ - Called to calculate the embedding weights (either via sampling or deterministically) - - Args: - x: numpy array containing the points to be embedded in the format (n,d) - - Returns: - y: numpy array containg the embedded points (n,m), where m is the embedding dimension - """ - - raise AttributeError("Only derived classes can call this method.") - - def get_m(self): - """ - - :return: - - """ - return self.m - - def integral(self, S): - """ - Compute the integral of the kernel over the set S - - :param S: Borel set - :return: array of length self.m of integrals of each basis function over the set S - """ - a = S.bounds[:, 0] - b = S.bounds[:, 1] - psi = torch.zeros(self.m).double() - - for i in range(self.m // 2): - omegas = self.W[i, :].view(-1) - psi[i] = quad_help.integrate_cos_multidimensional(a.numpy(), b.numpy(), omegas.numpy()) - psi[self.m // 2 + i] = quad_help.integrate_sin_multidimensional(a.numpy(), b.numpy(), omegas.numpy()) - return psi +class Embedding: + """ + Base class for Embeddings to approximate kernels with a higher dimensional linear product. + """ + + def __init__( + self, + gamma=0.1, + nu=0.5, + m=100, + d=1, + diameter=1.0, + groups=None, + kappa=1.0, + kernel="squared_exponential", + cosine=False, + approx="rff", + **kwargs + ): + """ + Called to calculate the embedding weights (either via sampling or deterministically) + + Args: + gamma: (positional, 0.1) bandwidth of the squared exponential kernel + nu: (positional, 0.5) the parameter of Matern family + m: (positional, 1) + d: dimension of the + + Returns: + None + """ + self.gamma = float(gamma) + self.n = nu + self.m = int(m) + self.d = int(d) + self.nu = nu + self.kappa = kappa + self.cosine = cosine + self.diameter = diameter + self.groups = groups + self.kernel = kernel + self.approx = approx + self.gradient_avail = 0 + if self.m % 2 == 1: + raise AssertionError("Number of random features has to be even.") + + def sample(self): + """ + Called to calculate the embedding weights (either via sampling or deterministically) + + Args: + None + + Returns: + None + """ + raise AttributeError("Only derived classes can call this method.") + + def embed(self, x): + """ + Called to calculate the embedding weights (either via sampling or deterministically) + + Args: + x: numpy array containing the points to be embedded in the format (n,d) + + Returns: + y: numpy array containg the embedded points (n,m), where m is the embedding dimension + """ + + raise AttributeError("Only derived classes can call this method.") + + def get_m(self): + """ + + :return: + + """ + return self.m + + def integral(self, S): + """ + Compute the integral of the kernel over the set S + + :param S: Borel set + :return: array of length self.m of integrals of each basis function over the set S + """ + a = S.bounds[:, 0] + b = S.bounds[:, 1] + psi = torch.zeros(self.m).double() + + for i in range(self.m // 2): + omegas = self.W[i, :].view(-1) + psi[i] = quad_help.integrate_cos_multidimensional( + a.numpy(), b.numpy(), omegas.numpy() + ) + psi[self.m // 2 + i] = quad_help.integrate_sin_multidimensional( + a.numpy(), b.numpy(), omegas.numpy() + ) + return psi """ @@ -143,108 +159,124 @@ def integral(self, S): class RFFEmbedding(Embedding): - """ - Random Fourier Features emebedding - """ - - def __init__(self, biased=False, **kwargs): - super().__init__(**kwargs) - self.biased = biased - self.sample() - - def sampler(self, size): - """ - Defines the sampler object - - Args: - size: - - Return: - """ - if self.kernel == "squared_exponential": - distribution = lambda size: np.random.normal(size=size) * (1. / self.gamma) - inv_cum_dist = lambda x: norm.ppf(x) * (1. / self.gamma) - - elif self.kernel == "laplace": - distribution = None - inv_cum_dist = lambda x: (np.tan(np.pi * x - np.pi) / self.gamma) - - elif self.kernel == "modified_matern": - if self.nu == 2: - distribution = None - inv_cum_dist = None - pdf = lambda x: np.prod(2 * (self.gamma) / (np.power((1. + self.gamma ** 2 * x ** 2), 2) * np.pi), - axis=1) - elif self.nu == 3: - distribution = None - inv_cum_dist = None - pdf = lambda x: np.prod((8. * self.gamma) / (np.power((1. + self.gamma ** 2 * x ** 2), 3) * 3 * np.pi), - axis=1) - elif self.nu == 4: - distribution = None - inv_cum_dist = None - pdf = lambda x: np.prod((16. * self.gamma) / (np.power((1. + self.gamma ** 2 * x ** 2), 4) * 5 * np.pi), - axis=1) - - # Random Fourier Features - if self.approx == "rff": - if distribution == None: - if inv_cum_dist == None: - self.W = helper.rejection_sampling(pdf, size=size) - else: - self.W = helper.sample_custom(inv_cum_dist, size=size) - else: - self.W = distribution(size) - - # Quasi Fourier Features - elif self.approx == "halton": - if inv_cum_dist != None: - self.W = helper.sample_qmc_halton(inv_cum_dist, size=size) - else: - raise AssertionError("Inverse Cumulative Distribution could not be deduced") - - elif self.approx == "orf": - distribution = lambda size: np.random.normal(size=size) * (1.) - self.W = distribution(size) - - # QR decomposition - self.Q, _ = np.linalg.qr(self.W) - # df and size - self.S = np.diag(chi.rvs(size[1], size=size[0])) - self.W = np.dot(self.S, self.Q) / self.gamma ** 2 - - return self.W - - def sample(self): - """ - Samples Random Fourier Features - """ - self.W = self.sampler(size=(self.m, self.d)) - self.W = torch.from_numpy(self.W) - - if self.biased == True: - self.b = 2. * np.pi * np.random.uniform(size=(self.m)) - self.bs = self.b.reshape(self.m, 1) - self.b = torch.from_numpy(self.b) - self.bs = torch.from_numpy(self.bs) - - def embed(self, x): - """ - :param x: torch array - :return: embeded vector - """ - (times, d) = x.shape - if self.biased == True: - z = np.sqrt(2. / self.m) * torch.t(torch.cos(self.W[:, 0:d].mm(torch.t(x)) + self.b.view(self.m, 1))) - else: - q = self.W[:, 0:d].mm(torch.t(x)) - # z[0:int(self.m / 2), :] = \ - z1 = np.sqrt(2. / float(self.m)) * torch.cos(q[0:int(self.m / 2), :]) - # z[int(self.m / 2):self.m, :] = np.sqrt(2. / float(self.m)) * torch.sin(q[int(self.m / 2):self.m, :]) - z2 = np.sqrt(2. / float(self.m)) * torch.sin(q[int(self.m / 2):self.m, :]) - z = torch.cat([z1, z2]) - - return torch.t(z) * np.sqrt(self.kappa) + """ + Random Fourier Features emebedding + """ + + def __init__(self, biased=False, **kwargs): + super().__init__(**kwargs) + self.biased = biased + self.sample() + + def sampler(self, size): + """ + Defines the sampler object + + Args: + size: + + Return: + """ + if self.kernel == "squared_exponential": + distribution = lambda size: np.random.normal(size=size) * (1.0 / self.gamma) + inv_cum_dist = lambda x: norm.ppf(x) * (1.0 / self.gamma) + + elif self.kernel == "laplace": + distribution = None + inv_cum_dist = lambda x: (np.tan(np.pi * x - np.pi) / self.gamma) + + elif self.kernel == "modified_matern": + if self.nu == 2: + distribution = None + inv_cum_dist = None + pdf = lambda x: np.prod( + 2 + * (self.gamma) + / (np.power((1.0 + self.gamma**2 * x**2), 2) * np.pi), + axis=1, + ) + elif self.nu == 3: + distribution = None + inv_cum_dist = None + pdf = lambda x: np.prod( + (8.0 * self.gamma) + / (np.power((1.0 + self.gamma**2 * x**2), 3) * 3 * np.pi), + axis=1, + ) + elif self.nu == 4: + distribution = None + inv_cum_dist = None + pdf = lambda x: np.prod( + (16.0 * self.gamma) + / (np.power((1.0 + self.gamma**2 * x**2), 4) * 5 * np.pi), + axis=1, + ) + + # Random Fourier Features + if self.approx == "rff": + if distribution == None: + if inv_cum_dist == None: + self.W = helper.rejection_sampling(pdf, size=size) + else: + self.W = helper.sample_custom(inv_cum_dist, size=size) + else: + self.W = distribution(size) + + # Quasi Fourier Features + elif self.approx == "halton": + if inv_cum_dist != None: + self.W = helper.sample_qmc_halton(inv_cum_dist, size=size) + else: + raise AssertionError( + "Inverse Cumulative Distribution could not be deduced" + ) + + elif self.approx == "orf": + distribution = lambda size: np.random.normal(size=size) * (1.0) + self.W = distribution(size) + + # QR decomposition + self.Q, _ = np.linalg.qr(self.W) + # df and size + self.S = np.diag(chi.rvs(size[1], size=size[0])) + self.W = np.dot(self.S, self.Q) / self.gamma**2 + + return self.W + + def sample(self): + """ + Samples Random Fourier Features + """ + self.W = self.sampler(size=(self.m, self.d)) + self.W = torch.from_numpy(self.W) + + if self.biased == True: + self.b = 2.0 * np.pi * np.random.uniform(size=(self.m)) + self.bs = self.b.reshape(self.m, 1) + self.b = torch.from_numpy(self.b) + self.bs = torch.from_numpy(self.bs) + + def embed(self, x): + """ + :param x: torch array + :return: embeded vector + """ + (times, d) = x.shape + if self.biased == True: + z = np.sqrt(2.0 / self.m) * torch.t( + torch.cos(self.W[:, 0:d].mm(torch.t(x)) + self.b.view(self.m, 1)) + ) + else: + q = self.W[:, 0:d].mm(torch.t(x)) + # z[0:int(self.m / 2), :] = \ + z1 = np.sqrt(2.0 / float(self.m)) * torch.cos(q[0 : int(self.m / 2), :]) + # z[int(self.m / 2):self.m, :] = np.sqrt(2. / float(self.m)) * torch.sin(q[int(self.m / 2):self.m, :]) + z2 = np.sqrt(2.0 / float(self.m)) * torch.sin( + q[int(self.m / 2) : self.m, :] + ) + z = torch.cat([z1, z2]) + + return torch.t(z) * np.sqrt(self.kappa) """ @@ -255,523 +287,611 @@ def embed(self, x): class QuadratureEmbedding(Embedding): - """ - General quadrature embedding - """ - - def __init__(self, scale=1.0, **kwargs): - Embedding.__init__(self, **kwargs) - self.scale = scale - self.compute() - - def reorder_complexity(self, omegas, weights): - abs_omegas = np.abs(omegas) - order = np.argsort(abs_omegas) - new_omegas = omegas[order] - new_weights = weights[order] - return new_omegas, new_weights - - def derivative_1(self, x): - (times, d) = tuple(x.size()) - # z = torch.from_numpy(np.zeros(shape=(self.m, times),dtype=x.dtype)) - z = torch.zeros(self.d, self.m, times, dtype=x.dtype) - q = torch.mm(self.W[:, 0:d], torch.t(x)) # (m,d)x(d,n) - - omegas = self.W[:, 0:d] # (m,d) - - if self.cosine == False: - z[:, 0:int(self.m / 2), :] = -torch.einsum('ij,ik->jik', omegas, - torch.sqrt(self.weights.view(-1, 1)) * torch.sin( - q)) # (m,d) (m,n) - z[:, int(self.m / 2):self.m, :] = torch.einsum('ij,ik->jik', omegas, - torch.sqrt(self.weights.view(-1, 1)) * torch.cos(q)) - else: - raise NotImplementedError("Cosine only features derivative not implemented") - - return np.sqrt(self.kappa) * z - - def derivative_2(self, x): - (times, d) = tuple(x.size()) - # z = torch.from_numpy(np.zeros(shape=(self.m, times),dtype=x.dtype)) - z = torch.zeros(self.d, self.d, self.m, times, dtype=x.dtype) - q = torch.mm(self.W[:, 0:d], torch.t(x)) # (m,d)x(d,n) - - omegas = self.W[:, 0:d] # (m,d) - - if self.cosine == False: - z[:, :, 0:int(self.m / 2), :] = -torch.einsum('il,ij,ik->jlik', omegas, omegas, - torch.sqrt(self.weights.view(-1, 1)) * torch.cos( - q)) # (m,d) (m,d) (m,n) - z[:, :, int(self.m / 2):self.m, :] = -torch.einsum('il,ij,ik->jlik', omegas, omegas, - torch.sqrt(self.weights.view(-1, 1)) * torch.sin(q)) - else: - raise NotImplementedError("Cosine only features derivative not implemented") - - return np.sqrt(self.kappa) * z - - def product_integral(self, S): - """ - Compute the outer product integral - :param S: Borel set - :return: m times m matrix with integrate entries - """ - assert S.d == 1 or S.d == 2 - if S.d == 1: - a = S.bounds[0, 0] - b = S.bounds[0, 1] - h = self.m // 2 - Psi = torch.zeros(size=(self.m, self.m)).double() - for i in range(h): - for j in range(h): - Psi[i, j] = torch.sqrt(self.weights[i] * self.weights[j]) * quad_help.integrate_cos_cos(a, b, - self.W[ - i, 0], - self.W[ - j, 0]) # cos cos - Psi[i, j + h] = torch.sqrt(self.weights[i] * self.weights[j]) * quad_help.integrate_sin_cos(a, b, - self.W[ - i, 0], - self.W[ - j, 0]) # cos sin - Psi[i + h, j] = torch.sqrt(self.weights[j] * self.weights[i]) * quad_help.integrate_sin_cos(a, b, - self.W[ - j, 0], - self.W[ - i, 0]) # sin cos - Psi[i + h, j + h] = torch.sqrt(self.weights[i] * self.weights[j]) * quad_help.integrate_sin_sin(a, - b, - self.W[ - i, 0], - self.W[ - j, 0]) # sin sin - elif S.d == 2: - xa = S.bounds[0, 0] - xb = S.bounds[0, 1] - ya = S.bounds[1, 0] - yb = S.bounds[1, 1] - h = self.m // 2 - Psi = torch.zeros(size=(self.m, self.m)).double() - for i in range(h): - for j in range(h): - Psi[i, j] = torch.sqrt(self.weights[i] * self.weights[j]) \ - * quad_help.integrate2d_cos_cos(xa, ya, xb, yb, self.W[i, 0], self.W[i, 1], - self.W[j, 0], self.W[j, 1]) # cos cos - Psi[i, j + h] = torch.sqrt(self.weights[i] * self.weights[j]) \ - * quad_help.integrate2d_sin_cos(xa, ya, xb, yb, self.W[i, 0], self.W[i, 1], - self.W[j, 0], self.W[j, 1]) # cos cos - Psi[i + h, j] = torch.sqrt(self.weights[j] * self.weights[i]) \ - * quad_help.integrate2d_sin_cos(xa, ya, xb, yb, self.W[j, 0], self.W[j, 1], - self.W[i, 0], self.W[i, 1]) # cos cos - Psi[i + h, j + h] = torch.sqrt(self.weights[i] * self.weights[j]) \ - * quad_help.integrate2d_sin_sin(xa, ya, xb, yb, self.W[i, 0], self.W[i, 1], - self.W[j, 0], self.W[j, 1]) # cos cos - return self.kappa * Psi - - def compute(self, complexity_reorder=True): - """ - Computes the tensor grid for Fourier features - :return: - """ - - if self.cosine == False: - self.q = int(np.power(self.m // 2, 1. / self.d)) - self.m = self.q ** self.d - else: - self.q = int(np.power(self.m, 1. / self.d)) - self.m = self.q ** self.d - - (omegas, weights) = self.nodesAndWeights(self.q) - - if complexity_reorder == True: - (omegas, weights) = self.reorder_complexity(omegas, weights) - - self.weights = helper.cartesian([weights for weight in range(self.d)]) - self.weights = np.prod(self.weights, axis=1) - - v = [omegas for omega in range(self.d)] - self.W = helper.cartesian(v) - - if self.cosine == False: - self.m = self.m * 2 - else: - pass - - self.W = torch.from_numpy(self.W) - self.weights = torch.from_numpy(self.weights) - - def transform(self): - """ - - :return: spectral density of a kernel - """ - if self.kernel == "squared_exponential": - p = lambda omega: np.exp(-np.sum(omega ** 2, axis=1).reshape(-1, 1) / 2 * (self.gamma ** 2)) * np.power( - (self.gamma / np.sqrt(2 * np.pi)), 1.) * np.power(np.pi / 2, 1.) - - elif self.kernel == "laplace": - p = lambda omega: np.prod(1. / ((self.gamma ** 2) * (omega ** 2) + 1.), axis=1).reshape(-1, 1) * np.power( - self.gamma / 2., 1.) - - elif self.kernel == "modified_matern": - if self.nu == 2: - p = lambda omega: np.prod(1. / ((self.gamma ** 2) * (omega ** 2) + 1.) ** self.nu, axis=1).reshape(-1, - 1) * np.power( - self.gamma * 1, 1.) - elif self.nu == 3: - p = lambda omega: np.prod(1. / ((self.gamma ** 2) * (omega ** 2) + 1.) ** self.nu, axis=1).reshape(-1, - 1) * np.power( - self.gamma * 4 / 3, 1.) - elif self.nu == 4: - p = lambda omega: np.prod(1. / ((self.gamma ** 2) * (omega ** 2) + 1.) ** self.nu, axis=1).reshape(-1, - 1) * np.power( - self.gamma * 8 / 5, 1.) - - return p - - def nodesAndWeights(self, q): - """ - Compute nodes and weights of the quadrature scheme in 1D - - :param q: degree of quadrature - :return: tuple of (nodes, weights) - """ - - # For osciallatory integrands even this has good properties. - # weights = np.ones(self.q) * self.scale * np.pi / (self.q + 1) - # omegas = (np.linspace(0, self.q - 1, self.q)) + 1 - # omegas = omegas * (np.pi / (self.q + 1)) - - (omegas, weights) = np.polynomial.legendre.leggauss(2 * q) - - omegas = omegas[q:] - weights = 2 * weights[q:] - - omegas = ((omegas + 1.) / 2.) * np.pi - sine_scale = (1. / (np.sin(omegas) ** 2)) - omegas = self.scale / np.tan(omegas) - prob = self.transform() - weights = self.scale * sine_scale * weights * prob(omegas.reshape(-1, 1)).flatten() - return (omegas, weights) - - def embed(self, x): - """ - :param x: torch array - :return: embeding of the x - """ - (times, d) = tuple(x.size()) - # z = torch.from_numpy(np.zeros(shape=(self.m, times),dtype=x.dtype)) - z = torch.zeros(self.m, times, dtype=x.dtype) - q = torch.mm(self.W[:, 0:d], torch.t(x)) - - if self.cosine == False: - z[0:int(self.m / 2), :] = torch.sqrt(self.weights.view(-1, 1)) * torch.cos(q) - z[int(self.m / 2):self.m, :] = torch.sqrt(self.weights.view(-1, 1)) * torch.sin(q) - else: - z = torch.sqrt(self.weights.view(-1, 1)) * torch.cos(q) - - return torch.t(z) * np.sqrt(self.kappa) - - def get_sub_indices(self, group): - """ - :param group: group part of the embeding to embed - :return: embeding of x in group - """ - m2 = self.m - mhalf = int(np.power(self.m // 2, 1. / self.d)) - - m = 2 * mhalf - mquater = mhalf // 2 - - if group == 0: - ind = np.arange(mquater * mhalf, (mquater + 1) * mhalf, 1).tolist() + np.arange(m2 // 2 + (mquater * mhalf), - m2 // 2 + ( - mquater + 1) * mhalf, - 1).tolist() - return ind - else: - ind = np.arange(mquater, m2 // 2, mhalf).tolist() + np.arange(m2 // 2 + mquater, m2, mhalf).tolist() - return ind - - def get_sum_sub_indices(self, group): - - # idenitfy unique values - arr = self.W[:, group] - values = np.unique(arr) - # find indices of each unique value - ind = [] - for value in values: - ind_inside = [] - for index, elem in enumerate(arr): - if elem == value: - ind_inside.append(index) - ind.append(ind_inside) - ind_inside2 = [i + self.m // 2 for i in ind_inside] - ind.append(ind_inside2) - return ind + """ + General quadrature embedding + """ + + def __init__(self, scale=1.0, **kwargs): + Embedding.__init__(self, **kwargs) + self.scale = scale + self.compute() + + def reorder_complexity(self, omegas, weights): + abs_omegas = np.abs(omegas) + order = np.argsort(abs_omegas) + new_omegas = omegas[order] + new_weights = weights[order] + return new_omegas, new_weights + + def derivative_1(self, x): + (times, d) = tuple(x.size()) + # z = torch.from_numpy(np.zeros(shape=(self.m, times),dtype=x.dtype)) + z = torch.zeros(self.d, self.m, times, dtype=x.dtype) + q = torch.mm(self.W[:, 0:d], torch.t(x)) # (m,d)x(d,n) + + omegas = self.W[:, 0:d] # (m,d) + + if self.cosine == False: + z[:, 0 : int(self.m / 2), :] = -torch.einsum( + "ij,ik->jik", + omegas, + torch.sqrt(self.weights.view(-1, 1)) * torch.sin(q), + ) # (m,d) (m,n) + z[:, int(self.m / 2) : self.m, :] = torch.einsum( + "ij,ik->jik", + omegas, + torch.sqrt(self.weights.view(-1, 1)) * torch.cos(q), + ) + else: + raise NotImplementedError("Cosine only features derivative not implemented") + + return np.sqrt(self.kappa) * z + + def derivative_2(self, x): + (times, d) = tuple(x.size()) + # z = torch.from_numpy(np.zeros(shape=(self.m, times),dtype=x.dtype)) + z = torch.zeros(self.d, self.d, self.m, times, dtype=x.dtype) + q = torch.mm(self.W[:, 0:d], torch.t(x)) # (m,d)x(d,n) + + omegas = self.W[:, 0:d] # (m,d) + + if self.cosine == False: + z[:, :, 0 : int(self.m / 2), :] = -torch.einsum( + "il,ij,ik->jlik", + omegas, + omegas, + torch.sqrt(self.weights.view(-1, 1)) * torch.cos(q), + ) # (m,d) (m,d) (m,n) + z[:, :, int(self.m / 2) : self.m, :] = -torch.einsum( + "il,ij,ik->jlik", + omegas, + omegas, + torch.sqrt(self.weights.view(-1, 1)) * torch.sin(q), + ) + else: + raise NotImplementedError("Cosine only features derivative not implemented") + + return np.sqrt(self.kappa) * z + + def product_integral(self, S): + """ + Compute the outer product integral + :param S: Borel set + :return: m times m matrix with integrate entries + """ + assert S.d == 1 or S.d == 2 + if S.d == 1: + a = S.bounds[0, 0] + b = S.bounds[0, 1] + h = self.m // 2 + Psi = torch.zeros(size=(self.m, self.m)).double() + for i in range(h): + for j in range(h): + Psi[i, j] = torch.sqrt( + self.weights[i] * self.weights[j] + ) * quad_help.integrate_cos_cos( + a, b, self.W[i, 0], self.W[j, 0] + ) # cos cos + Psi[i, j + h] = torch.sqrt( + self.weights[i] * self.weights[j] + ) * quad_help.integrate_sin_cos( + a, b, self.W[i, 0], self.W[j, 0] + ) # cos sin + Psi[i + h, j] = torch.sqrt( + self.weights[j] * self.weights[i] + ) * quad_help.integrate_sin_cos( + a, b, self.W[j, 0], self.W[i, 0] + ) # sin cos + Psi[i + h, j + h] = torch.sqrt( + self.weights[i] * self.weights[j] + ) * quad_help.integrate_sin_sin( + a, b, self.W[i, 0], self.W[j, 0] + ) # sin sin + elif S.d == 2: + xa = S.bounds[0, 0] + xb = S.bounds[0, 1] + ya = S.bounds[1, 0] + yb = S.bounds[1, 1] + h = self.m // 2 + Psi = torch.zeros(size=(self.m, self.m)).double() + for i in range(h): + for j in range(h): + Psi[i, j] = torch.sqrt( + self.weights[i] * self.weights[j] + ) * quad_help.integrate2d_cos_cos( + xa, + ya, + xb, + yb, + self.W[i, 0], + self.W[i, 1], + self.W[j, 0], + self.W[j, 1], + ) # cos cos + Psi[i, j + h] = torch.sqrt( + self.weights[i] * self.weights[j] + ) * quad_help.integrate2d_sin_cos( + xa, + ya, + xb, + yb, + self.W[i, 0], + self.W[i, 1], + self.W[j, 0], + self.W[j, 1], + ) # cos cos + Psi[i + h, j] = torch.sqrt( + self.weights[j] * self.weights[i] + ) * quad_help.integrate2d_sin_cos( + xa, + ya, + xb, + yb, + self.W[j, 0], + self.W[j, 1], + self.W[i, 0], + self.W[i, 1], + ) # cos cos + Psi[i + h, j + h] = torch.sqrt( + self.weights[i] * self.weights[j] + ) * quad_help.integrate2d_sin_sin( + xa, + ya, + xb, + yb, + self.W[i, 0], + self.W[i, 1], + self.W[j, 0], + self.W[j, 1], + ) # cos cos + return self.kappa * Psi + + def compute(self, complexity_reorder=True): + """ + Computes the tensor grid for Fourier features + :return: + """ + + if self.cosine == False: + self.q = int(np.power(self.m // 2, 1.0 / self.d)) + self.m = self.q**self.d + else: + self.q = int(np.power(self.m, 1.0 / self.d)) + self.m = self.q**self.d + + (omegas, weights) = self.nodesAndWeights(self.q) + + if complexity_reorder == True: + (omegas, weights) = self.reorder_complexity(omegas, weights) + + self.weights = helper.cartesian([weights for weight in range(self.d)]) + self.weights = np.prod(self.weights, axis=1) + + v = [omegas for omega in range(self.d)] + self.W = helper.cartesian(v) + + if self.cosine == False: + self.m = self.m * 2 + else: + pass + + self.W = torch.from_numpy(self.W) + self.weights = torch.from_numpy(self.weights) + + def transform(self): + """ + + :return: spectral density of a kernel + """ + if self.kernel == "squared_exponential": + p = ( + lambda omega: np.exp( + -np.sum(omega**2, axis=1).reshape(-1, 1) / 2 * (self.gamma**2) + ) + * np.power((self.gamma / np.sqrt(2 * np.pi)), 1.0) + * np.power(np.pi / 2, 1.0) + ) + + elif self.kernel == "laplace": + p = lambda omega: np.prod( + 1.0 / ((self.gamma**2) * (omega**2) + 1.0), axis=1 + ).reshape(-1, 1) * np.power(self.gamma / 2.0, 1.0) + + elif self.kernel == "modified_matern": + if self.nu == 2: + p = lambda omega: np.prod( + 1.0 / ((self.gamma**2) * (omega**2) + 1.0) ** self.nu, axis=1 + ).reshape(-1, 1) * np.power(self.gamma * 1, 1.0) + elif self.nu == 3: + p = lambda omega: np.prod( + 1.0 / ((self.gamma**2) * (omega**2) + 1.0) ** self.nu, axis=1 + ).reshape(-1, 1) * np.power(self.gamma * 4 / 3, 1.0) + elif self.nu == 4: + p = lambda omega: np.prod( + 1.0 / ((self.gamma**2) * (omega**2) + 1.0) ** self.nu, axis=1 + ).reshape(-1, 1) * np.power(self.gamma * 8 / 5, 1.0) + + return p + + def nodesAndWeights(self, q): + """ + Compute nodes and weights of the quadrature scheme in 1D + + :param q: degree of quadrature + :return: tuple of (nodes, weights) + """ + + # For osciallatory integrands even this has good properties. + # weights = np.ones(self.q) * self.scale * np.pi / (self.q + 1) + # omegas = (np.linspace(0, self.q - 1, self.q)) + 1 + # omegas = omegas * (np.pi / (self.q + 1)) + + (omegas, weights) = np.polynomial.legendre.leggauss(2 * q) + + omegas = omegas[q:] + weights = 2 * weights[q:] + + omegas = ((omegas + 1.0) / 2.0) * np.pi + sine_scale = 1.0 / (np.sin(omegas) ** 2) + omegas = self.scale / np.tan(omegas) + prob = self.transform() + weights = ( + self.scale * sine_scale * weights * prob(omegas.reshape(-1, 1)).flatten() + ) + return (omegas, weights) + + def embed(self, x): + """ + :param x: torch array + :return: embeding of the x + """ + (times, d) = tuple(x.size()) + # z = torch.from_numpy(np.zeros(shape=(self.m, times),dtype=x.dtype)) + z = torch.zeros(self.m, times, dtype=x.dtype) + q = torch.mm(self.W[:, 0:d], torch.t(x)) + + if self.cosine == False: + z[0 : int(self.m / 2), :] = torch.sqrt( + self.weights.view(-1, 1) + ) * torch.cos(q) + z[int(self.m / 2) : self.m, :] = torch.sqrt( + self.weights.view(-1, 1) + ) * torch.sin(q) + else: + z = torch.sqrt(self.weights.view(-1, 1)) * torch.cos(q) + + return torch.t(z) * np.sqrt(self.kappa) + + def get_sub_indices(self, group): + """ + :param group: group part of the embeding to embed + :return: embeding of x in group + """ + m2 = self.m + mhalf = int(np.power(self.m // 2, 1.0 / self.d)) + + m = 2 * mhalf + mquater = mhalf // 2 + + if group == 0: + ind = ( + np.arange(mquater * mhalf, (mquater + 1) * mhalf, 1).tolist() + + np.arange( + m2 // 2 + (mquater * mhalf), m2 // 2 + (mquater + 1) * mhalf, 1 + ).tolist() + ) + return ind + else: + ind = ( + np.arange(mquater, m2 // 2, mhalf).tolist() + + np.arange(m2 // 2 + mquater, m2, mhalf).tolist() + ) + return ind + + def get_sum_sub_indices(self, group): + + # idenitfy unique values + arr = self.W[:, group] + values = np.unique(arr) + # find indices of each unique value + ind = [] + for value in values: + ind_inside = [] + for index, elem in enumerate(arr): + if elem == value: + ind_inside.append(index) + ind.append(ind_inside) + ind_inside2 = [i + self.m // 2 for i in ind_inside] + ind.append(ind_inside2) + return ind class TrapezoidalEmbedding(QuadratureEmbedding): - def __init__(self, **kwargs): - QuadratureEmbedding.__init__(self, **kwargs) - if self.kernel != "squared_exponential": - raise AssertionError("This embeding is allowed only with Squared Exponential Kernel") + def __init__(self, **kwargs): + QuadratureEmbedding.__init__(self, **kwargs) + if self.kernel != "squared_exponential": + raise AssertionError( + "This embeding is allowed only with Squared Exponential Kernel" + ) - def nodesAndWeights(self, q): - prob = self.transform() - # prob = lambda x: - h = np.sqrt(np.pi / q) / self.gamma ** 2 + def nodesAndWeights(self, q): + prob = self.transform() + # prob = lambda x: + h = np.sqrt(np.pi / q) / self.gamma**2 - nodes = np.linspace(-q // 2, q // 2, q) * h - # print (nodes) + nodes = np.linspace(-q // 2, q // 2, q) * h + # print (nodes) - weights = h * prob(nodes.reshape(-1, 1)).flatten() * (2 / np.pi) + weights = h * prob(nodes.reshape(-1, 1)).flatten() * (2 / np.pi) - # nodes = np.sqrt(2) * nodes / self.gamma + # nodes = np.sqrt(2) * nodes / self.gamma - return (nodes, weights) + return (nodes, weights) class ClenshawCurtisEmbedding(QuadratureEmbedding): - def __init__(self, **kwargs): - QuadratureEmbedding.__init__(self, **kwargs) - if self.kernel != "squared_exponential": - raise AssertionError("This embeding is allowed only with Squared Exponential Kernel") + def __init__(self, **kwargs): + QuadratureEmbedding.__init__(self, **kwargs) + if self.kernel != "squared_exponential": + raise AssertionError( + "This embeding is allowed only with Squared Exponential Kernel" + ) - def nodesAndWeights(self, q): - L = 1. / self.gamma - prob = self.transform() - # prob = lambda x: + def nodesAndWeights(self, q): + L = 1.0 / self.gamma + prob = self.transform() + # prob = lambda x: - nodes_0 = np.linspace(0, q + 1, q + 2) - nodes_0 = np.pi * nodes_0[1:-1] / (q + 2) - nodes = L / np.tan(nodes_0) + nodes_0 = np.linspace(0, q + 1, q + 2) + nodes_0 = np.pi * nodes_0[1:-1] / (q + 2) + nodes = L / np.tan(nodes_0) - weights = L * (np.pi / (q + 2)) * (1. / np.sin(nodes_0) ** 2) - weights = weights * prob(nodes.reshape(-1, 1)).flatten() * (2. / np.pi) + weights = L * (np.pi / (q + 2)) * (1.0 / np.sin(nodes_0) ** 2) + weights = weights * prob(nodes.reshape(-1, 1)).flatten() * (2.0 / np.pi) - return (nodes, weights) + return (nodes, weights) - def nodesAndWeights2(self, q): - prob = self.transform() + def nodesAndWeights2(self, q): + prob = self.transform() - nodes_0 = np.linspace(0, q + 1, q + 2) - nodes_0 = nodes_0[1:-1] / (q + 2) * np.pi + nodes_0 = np.linspace(0, q + 1, q + 2) + nodes_0 = nodes_0[1:-1] / (q + 2) * np.pi - nodes = np.sqrt(-np.log(np.sin(nodes_0[0:q // 2]))) - nodes2 = -np.sqrt(-np.log(np.sin(nodes_0[q // 2:]))) + nodes = np.sqrt(-np.log(np.sin(nodes_0[0 : q // 2]))) + nodes2 = -np.sqrt(-np.log(np.sin(nodes_0[q // 2 :]))) - n1 = nodes_0[0:q // 2] - n2 = nodes_0[q // 2:] + n1 = nodes_0[0 : q // 2] + n2 = nodes_0[q // 2 :] - weights = (1. / np.tan(n1)) * (1. / np.sqrt(-np.log(np.sin(n1)))) * prob( - nodes.reshape(-1, 1)).flatten() * np.pi / (q + 2) - weights2 = -(1. / np.tan(n2)) * (1. / np.sqrt(-np.log(np.sin(n2)))) * prob( - nodes.reshape(-1, 1)).flatten() * np.pi / (q + 2) + weights = ( + (1.0 / np.tan(n1)) + * (1.0 / np.sqrt(-np.log(np.sin(n1)))) + * prob(nodes.reshape(-1, 1)).flatten() + * np.pi + / (q + 2) + ) + weights2 = ( + -(1.0 / np.tan(n2)) + * (1.0 / np.sqrt(-np.log(np.sin(n2)))) + * prob(nodes.reshape(-1, 1)).flatten() + * np.pi + / (q + 2) + ) - nodes = np.concatenate((nodes, nodes2)) - weights = np.concatenate((weights, weights2)) + nodes = np.concatenate((nodes, nodes2)) + weights = np.concatenate((weights, weights2)) - return (nodes, weights) + return (nodes, weights) class HermiteEmbedding(QuadratureEmbedding): - """ - Hermite Quadrature Fourier Features for squared exponential kernel - """ - - def __init__(self, ones=False, cosine=False, **kwargs): - self.ones = ones - self.cosine = cosine - QuadratureEmbedding.__init__(self, **kwargs) - if self.kernel != "squared_exponential": - raise AssertionError("Hermite Embedding is allowed only with Squared Exponential Kernel") - - def nodesAndWeights(self, q): - """ - Compute nodes and weights of the quadrature scheme in 1D - - :param q: degree of quadrature - :return: tuple of (nodes, weights) - """ - (nodes, weights) = np.polynomial.hermite.hermgauss(2 * q) - # print (nodes) - nodes = nodes[q:] - weights = 2 * weights[q:] - - if self.ones == True: - weights = np.ones(q) - - nodes = np.sqrt(2) * nodes / self.gamma - weights = weights / np.sqrt(np.pi) - return (nodes, weights) + """ + Hermite Quadrature Fourier Features for squared exponential kernel + """ + + def __init__(self, ones=False, cosine=False, **kwargs): + self.ones = ones + self.cosine = cosine + QuadratureEmbedding.__init__(self, **kwargs) + if self.kernel != "squared_exponential": + raise AssertionError( + "Hermite Embedding is allowed only with Squared Exponential Kernel" + ) + + def nodesAndWeights(self, q): + """ + Compute nodes and weights of the quadrature scheme in 1D + + :param q: degree of quadrature + :return: tuple of (nodes, weights) + """ + (nodes, weights) = np.polynomial.hermite.hermgauss(2 * q) + # print (nodes) + nodes = nodes[q:] + weights = 2 * weights[q:] + + if self.ones == True: + weights = np.ones(q) + + nodes = np.sqrt(2) * nodes / self.gamma + weights = weights / np.sqrt(np.pi) + return (nodes, weights) class OverCompleteHermiteEmbedding(HermiteEmbedding): - def nodesAndWeights(self, q): - """ - Compute nodes and weights of the quadrature scheme in 1D + def nodesAndWeights(self, q): + """ + Compute nodes and weights of the quadrature scheme in 1D - :param q: degree of quadrature - :return: tuple of (nodes, weights) - """ - (nodes, weights) = np.polynomial.hermite.hermgauss(q) - nodes = nodes - weights = weights + :param q: degree of quadrature + :return: tuple of (nodes, weights) + """ + (nodes, weights) = np.polynomial.hermite.hermgauss(q) + nodes = nodes + weights = weights - nodes = np.sqrt(2) * nodes / self.gamma - weights = weights / np.sqrt(np.pi) - return (nodes, weights) + nodes = np.sqrt(2) * nodes / self.gamma + weights = weights / np.sqrt(np.pi) + return (nodes, weights) class MaternEmbedding(QuadratureEmbedding): - """ - Matern specific quadrature based Fourier Features - """ + """ + Matern specific quadrature based Fourier Features + """ - def __init__(self, **kwargs): - super().__init__(**kwargs) - if self.kernel != "modified_matern" and self.kernel != "laplace": - raise AssertionError("Matern Embedding is allowed only with Matern Kernel") + def __init__(self, **kwargs): + super().__init__(**kwargs) + if self.kernel != "modified_matern" and self.kernel != "laplace": + raise AssertionError("Matern Embedding is allowed only with Matern Kernel") - def nodesAndWeights(self, q): - """ - Compute nodes and weights of the quadrature scheme in 1D + def nodesAndWeights(self, q): + """ + Compute nodes and weights of the quadrature scheme in 1D - :param q: degree of quadrature - :return: tuple of (nodes, weights) - """ - (nodes, weights) = np.polynomial.hermite.hermgauss(q) - nodes = np.sqrt(2) * nodes / self.gamma - weights = weights / np.sqrt(np.pi) - return (nodes, weights) + :param q: degree of quadrature + :return: tuple of (nodes, weights) + """ + (nodes, weights) = np.polynomial.hermite.hermgauss(q) + nodes = np.sqrt(2) * nodes / self.gamma + weights = weights / np.sqrt(np.pi) + return (nodes, weights) class QuadPeriodicEmbedding(QuadratureEmbedding): - """ - General class implementing - """ + """ + General class implementing + """ - def __init__(self, **kwargs): - super().__init__(**kwargs) + def __init__(self, **kwargs): + super().__init__(**kwargs) - def nodesAndWeights(self, q): - """ - Compute nodes and weights of the quadrature scheme in 1D + def nodesAndWeights(self, q): + """ + Compute nodes and weights of the quadrature scheme in 1D - :param q: degree of quadrature - :return: tuple of (nodes, weights) - """ - weights = np.ones(self.q) * self.scale * 2 / (self.q + 1) - omegas = (np.linspace(0, self.q - 1, self.q)) + 1 - omegas = omegas * (np.pi / (self.q + 1)) + :param q: degree of quadrature + :return: tuple of (nodes, weights) + """ + weights = np.ones(self.q) * self.scale * 2 / (self.q + 1) + omegas = (np.linspace(0, self.q - 1, self.q)) + 1 + omegas = omegas * (np.pi / (self.q + 1)) - sine_scale = (1. / (np.sin(omegas) ** 2)) - omegas = self.scale / np.tan(omegas) - prob = self.transform() - weights = self.scale * sine_scale * weights * prob(omegas.reshape(-1, 1)).flatten() - return (omegas, weights) + sine_scale = 1.0 / (np.sin(omegas) ** 2) + omegas = self.scale / np.tan(omegas) + prob = self.transform() + weights = ( + self.scale * sine_scale * weights * prob(omegas.reshape(-1, 1)).flatten() + ) + return (omegas, weights) class KLEmbedding(QuadratureEmbedding): - """ - General class implementing Karhunen-Loeve expansion - """ + """ + General class implementing Karhunen-Loeve expansion + """ - def __init__(self, **kwargs): - super().__init__(**kwargs) + def __init__(self, **kwargs): + super().__init__(**kwargs) class LatticeEmbedding(QuadratureEmbedding): - """ - Class for standard basis indexed by natural numbers - """ + """ + Class for standard basis indexed by natural numbers + """ - def __init__(self, **kwargs): - super().__init__(**kwargs) + def __init__(self, **kwargs): + super().__init__(**kwargs) - # if self.kernel != "modified_matern" and self.kernel !="laplace": - # raise AssertionError("Matern Embedding is allowed only with Matern Kernel") + # if self.kernel != "modified_matern" and self.kernel !="laplace": + # raise AssertionError("Matern Embedding is allowed only with Matern Kernel") - def nodesAndWeights(self, q): - """ - Compute nodes and weights of the quadrature scheme in 1D + def nodesAndWeights(self, q): + """ + Compute nodes and weights of the quadrature scheme in 1D - :param q: degree of quadrature - :return: tuple of (nodes, weights) - """ - nodes = np.arange(1, q + 1, 1) - nodes = np.sqrt(2) * nodes / self.gamma - weights = np.ones(q) / (2 * q) - return (nodes, weights) + :param q: degree of quadrature + :return: tuple of (nodes, weights) + """ + nodes = np.arange(1, q + 1, 1) + nodes = np.sqrt(2) * nodes / self.gamma + weights = np.ones(q) / (2 * q) + return (nodes, weights) class ConcatEmbedding(Embedding): - def __init__(self, embeddings: List[Embedding]): + def __init__(self, embeddings: List[Embedding]): - self.embeddings = embeddings - self.m = sum([emb.get_m() for emb in embeddings]) + self.embeddings = embeddings + self.m = sum([emb.get_m() for emb in embeddings]) - def embed(self, xtest): - return torch.hstack([emb.embed(xtest) for emb in self.embeddings]) + def embed(self, xtest): + return torch.hstack([emb.embed(xtest) for emb in self.embeddings]) class MaskedEmbedding(Embedding): - def __init__(self, embedding: Embedding, mask: Callable): - self.embedding = embedding - self.m = self.embedding.get_m() - self.mask = mask - - def embed(self, xtest): - return torch.diag(self.mask(xtest))@self.embedding.embed(xtest) - - -class AdditiveEmbeddings(): - - def __init__(self, embeddings, ms, groups=None, scaling=None, additive=True): - self.emebeddings = embeddings - if scaling is None: - self.scaling = torch.ones(len(self.emebeddings)).double() # /np.sqrt(len(self.emebeddings)) - else: - self.scaling = scaling - self.additive = additive - - if groups is not None: - self.groups = groups - else: - self.groups = [[i] for i in range(len(self.emebeddings))] - - try: - self.ms = torch.Tensor(ms) - except: - self.ms = ms - - self.no_emb = len(self.emebeddings) - self.m = torch.sum(self.ms) - - def embed(self, x): - if self.additive: - r = torch.zeros(size=(x.size()[0], int(torch.sum(self.ms)))).double() - count = 0 - for index, embedding in enumerate(self.emebeddings): - r[:, count:count + int(self.ms[index])] = \ - embedding.embed(x[:, self.groups[index]].view(-1, len(self.groups[index]))) * self.scaling[index] - count = count + int(self.ms[index]) - return r - else: - pass - - -class ProjectiveEmbeddings(): - - def __init__(self, embedding, project): - self.embedding = embedding - self.project = project - - def embed(self, x): - r = self.embedding.embed(self.project(x)) - return r + def __init__(self, embedding: Embedding, mask: Callable): + self.embedding = embedding + self.m = self.embedding.get_m() + self.mask = mask + + def embed(self, xtest): + return torch.diag(self.mask(xtest)) @ self.embedding.embed(xtest) + + +class AdditiveEmbeddings: + + def __init__(self, embeddings, ms, groups=None, scaling=None, additive=True): + self.emebeddings = embeddings + if scaling is None: + self.scaling = torch.ones( + len(self.emebeddings) + ).double() # /np.sqrt(len(self.emebeddings)) + else: + self.scaling = scaling + self.additive = additive + + if groups is not None: + self.groups = groups + else: + self.groups = [[i] for i in range(len(self.emebeddings))] + + try: + self.ms = torch.Tensor(ms) + except: + self.ms = ms + + self.no_emb = len(self.emebeddings) + self.m = torch.sum(self.ms) + + def embed(self, x): + if self.additive: + r = torch.zeros(size=(x.size()[0], int(torch.sum(self.ms)))).double() + count = 0 + for index, embedding in enumerate(self.emebeddings): + r[:, count : count + int(self.ms[index])] = ( + embedding.embed( + x[:, self.groups[index]].view(-1, len(self.groups[index])) + ) + * self.scaling[index] + ) + count = count + int(self.ms[index]) + return r + else: + pass + + +class ProjectiveEmbeddings: + + def __init__(self, embedding, project): + self.embedding = embedding + self.project = project + + def embed(self, x): + r = self.embedding.embed(self.project(x)) + return r diff --git a/stpy/embeddings/onehot_embedding.py b/stpy/embeddings/onehot_embedding.py index cdfdda7..944a532 100644 --- a/stpy/embeddings/onehot_embedding.py +++ b/stpy/embeddings/onehot_embedding.py @@ -6,30 +6,34 @@ class OnehotEmbedding(Embedding): - def __init__(self, p, d): - self.p = p # max value - self.d = d # sites - self.m = p*d + def __init__(self, p, d): + self.p = p # max value + self.d = d # sites + self.m = p * d - def get_m(self): - return self.p*self.d + def get_m(self): + return self.p * self.d + def apply(self, x, f): + return torch.stack( + [f(x_i) for i, x_i in enumerate(torch.unbind(x, dim=0), 0)], dim=0 + ) - def apply(self,x,f): - return torch.stack([f(x_i) for i, x_i in enumerate(torch.unbind(x, dim=0), 0)], dim=0) + def embed(self, x): + n, d = x.size() + out = torch.zeros(n, self.p * self.d).double() - def embed(self, x): - n,d = x.size() - out = torch.zeros(n,self.p*self.d).double() + f = lambda x: torch.from_numpy( + np.array([x[i] + 20 * i for i in range(self.d)]) + ).int() + indices = self.apply(x, f).long() + for i in range(n): + out[i, indices[i]] = 1.0 - f = lambda x: torch.from_numpy(np.array([x[i]+20*i for i in range(self.d)])).int() - indices = self.apply(x,f).long() - for i in range(n): - out[i,indices[i]] = 1. + return out - return out if __name__ == "__main__": - emb = OnehotEmbedding(20,2) - x = torch.Tensor([[2,3],[4,5],[10,19]]) - print (emb.embed(x)) \ No newline at end of file + emb = OnehotEmbedding(20, 2) + x = torch.Tensor([[2, 3], [4, 5], [10, 19]]) + print(emb.embed(x)) diff --git a/stpy/embeddings/optimal_positive_basis.py b/stpy/embeddings/optimal_positive_basis.py index 170d018..9ccc3c6 100644 --- a/stpy/embeddings/optimal_positive_basis.py +++ b/stpy/embeddings/optimal_positive_basis.py @@ -12,176 +12,203 @@ class OptimalPositiveBasis(PositiveEmbedding): - def __init__(self, *args, samples=300, discretization_size=30, saved=False, **kwargs): - super().__init__(*args, **kwargs) - self.samples = np.maximum(samples, self.m) - - B = BorelSet(self.d, torch.Tensor([[self.interval[0], self.interval[1]] for _ in range(self.d)]).double()) - self.discretized_domain = B.return_discretization(discretization_size) - - y = self.discretized_domain[:, 0].view(-1, 1) * 0 - - print("Optimal basis with arbitrary dimension, namely d =", self.d) - print("Starting optimal basis construction, with m =", self.m) - # self.new_kernel_object = KernelFunction(kernel_name=self.kernel_object.optkernel, - # gamma = self.kernel_object.gamma, d = self.kernel_object.d) - - self.new_kernel_object = self.kernel_object - if saved == True: - print("Did not load GP object, it needs to loaded") - else: - self.GP = NystromFeatures(self.new_kernel_object, m=self.m, approx='positive_svd', - samples=self.samples) - self.GP.fit_gp(self.discretized_domain, y) - print("Optimal basis constructed.") - if torch.sum(torch.isnan(self.GP.embed(self.discretized_domain))) > 0: - print("Failed basis? (zero is good):", torch.sum(torch.isnan(self.GP.embed(self.discretized_domain)))) - self.precomp_integral = {} - - def get_m(self): - return self.m - - def basis_fun(self, x, j): - return self.GP.embed(x)[:, j].view(-1, 1) - - def embed_internal(self, x): - out = torch.zeros(size=(x.size()[0], self.m), dtype=torch.float64) - for j in range(self.m): - out[:, j] = self.basis_fun(x, j).view(-1) - return out - - def save_embedding(self, filename): - filehandler = open(filename, 'w') - pickle.dump(self.GP, filehandler) - - def load_embedding(self, filename): - file_pi2 = open(filename, 'r') - self.GP = pickle.load(file_pi2) - - def get_constraints(self): - s = self.get_m() - l = np.full(s, 0.0).astype(float) - u = np.full(s, 10e10) - Lambda = np.identity(s) - return (l, Lambda, u) - - def integral(self, S): - assert (S.d == self.d) - - if S in self.precomp_integral.keys(): - return self.precomp_integral[S] - else: - if S.d == 1: - weights, nodes = S.return_legendre_discretization(n=256) - psi = torch.sum(torch.diag(weights) @ self.GP.embed(nodes), dim=0) - Gamma_half = self.cov() - psi = Gamma_half.T @ psi - self.precomp_integral[S] = psi - elif S.d == 2: - weights, nodes = S.return_legendre_discretization(n=50) - vals = self.embed_internal(nodes) - psi = torch.sum(torch.diag(weights) @ vals, dim=0) - Gamma_half = self.cov() - psi = Gamma_half.T @ psi - self.precomp_integral[S] = psi - if torch.sum(torch.isnan(psi)) > 0: - print("Failed integrals? (0 is good):", torch.sum(torch.isnan(psi))) - - else: - raise NotImplementedError("Higher dimension not implemented.") - return psi - - def cov(self, inverse=False): - - if self.precomp == False: - - x = self.discretized_domain - vals = self.GP.embed(x) - indices = torch.argmax(vals, dim=0) # the nodes are the maxima of the bump functions - t = x[indices] - print("nodes of functions", t.size()) - - self.Gamma = self.kernel(t, t) - Z = self.embed_internal(t) - - M = torch.pinverse(Z.T @ Z + (self.s) * torch.eye(self.Gamma.size()[0])) - self.M = torch.from_numpy(np.real(scipy.linalg.sqrtm(M.numpy()))) - - self.Gamma_half = torch.from_numpy( - np.real(scipy.linalg.sqrtm(self.Gamma.numpy() + (self.s ** 2) * np.eye(self.Gamma.size()[0])))) - self.Gamma_half = self.M @ self.Gamma_half - self.invGamma_half = torch.pinverse(self.Gamma_half) - self.precomp = True - else: - pass - - if inverse == True: - return self.Gamma_half, self.invGamma_half - else: - return self.Gamma_half + def __init__( + self, *args, samples=300, discretization_size=30, saved=False, **kwargs + ): + super().__init__(*args, **kwargs) + self.samples = np.maximum(samples, self.m) + + B = BorelSet( + self.d, + torch.Tensor( + [[self.interval[0], self.interval[1]] for _ in range(self.d)] + ).double(), + ) + self.discretized_domain = B.return_discretization(discretization_size) + + y = self.discretized_domain[:, 0].view(-1, 1) * 0 + + print("Optimal basis with arbitrary dimension, namely d =", self.d) + print("Starting optimal basis construction, with m =", self.m) + # self.new_kernel_object = KernelFunction(kernel_name=self.kernel_object.optkernel, + # gamma = self.kernel_object.gamma, d = self.kernel_object.d) + + self.new_kernel_object = self.kernel_object + if saved == True: + print("Did not load GP object, it needs to loaded") + else: + self.GP = NystromFeatures( + self.new_kernel_object, + m=self.m, + approx="positive_svd", + samples=self.samples, + ) + self.GP.fit_gp(self.discretized_domain, y) + print("Optimal basis constructed.") + if torch.sum(torch.isnan(self.GP.embed(self.discretized_domain))) > 0: + print( + "Failed basis? (zero is good):", + torch.sum(torch.isnan(self.GP.embed(self.discretized_domain))), + ) + self.precomp_integral = {} + + def get_m(self): + return self.m + + def basis_fun(self, x, j): + return self.GP.embed(x)[:, j].view(-1, 1) + + def embed_internal(self, x): + out = torch.zeros(size=(x.size()[0], self.m), dtype=torch.float64) + for j in range(self.m): + out[:, j] = self.basis_fun(x, j).view(-1) + return out + + def save_embedding(self, filename): + filehandler = open(filename, "w") + pickle.dump(self.GP, filehandler) + + def load_embedding(self, filename): + file_pi2 = open(filename, "r") + self.GP = pickle.load(file_pi2) + + def get_constraints(self): + s = self.get_m() + l = np.full(s, 0.0).astype(float) + u = np.full(s, 10e10) + Lambda = np.identity(s) + return (l, Lambda, u) + + def integral(self, S): + assert S.d == self.d + + if S in self.precomp_integral.keys(): + return self.precomp_integral[S] + else: + if S.d == 1: + weights, nodes = S.return_legendre_discretization(n=256) + psi = torch.sum(torch.diag(weights) @ self.GP.embed(nodes), dim=0) + Gamma_half = self.cov() + psi = Gamma_half.T @ psi + self.precomp_integral[S] = psi + elif S.d == 2: + weights, nodes = S.return_legendre_discretization(n=50) + vals = self.embed_internal(nodes) + psi = torch.sum(torch.diag(weights) @ vals, dim=0) + Gamma_half = self.cov() + psi = Gamma_half.T @ psi + self.precomp_integral[S] = psi + if torch.sum(torch.isnan(psi)) > 0: + print("Failed integrals? (0 is good):", torch.sum(torch.isnan(psi))) + + else: + raise NotImplementedError("Higher dimension not implemented.") + return psi + + def cov(self, inverse=False): + + if self.precomp == False: + + x = self.discretized_domain + vals = self.GP.embed(x) + indices = torch.argmax( + vals, dim=0 + ) # the nodes are the maxima of the bump functions + t = x[indices] + print("nodes of functions", t.size()) + + self.Gamma = self.kernel(t, t) + Z = self.embed_internal(t) + + M = torch.pinverse(Z.T @ Z + (self.s) * torch.eye(self.Gamma.size()[0])) + self.M = torch.from_numpy(np.real(scipy.linalg.sqrtm(M.numpy()))) + + self.Gamma_half = torch.from_numpy( + np.real( + scipy.linalg.sqrtm( + self.Gamma.numpy() + (self.s**2) * np.eye(self.Gamma.size()[0]) + ) + ) + ) + self.Gamma_half = self.M @ self.Gamma_half + self.invGamma_half = torch.pinverse(self.Gamma_half) + self.precomp = True + else: + pass + + if inverse == True: + return self.Gamma_half, self.invGamma_half + else: + return self.Gamma_half if __name__ == "__main__": - from stpy.continuous_processes.gauss_procc import GaussianProcess - from stpy.helpers.helper import interval - import matplotlib.pyplot as plt - from scipy.interpolate import griddata - - d = 2 - m = 64 - n = 64 - N = 20 - sqrtbeta = 2 - s = 0.01 - b = 0 - gamma = 0.5 - k = KernelFunction(gamma=gamma, d=2) - - Emb = OptimalPositiveBasis(d, m, offset=0.2, s=s, b=b, discretization_size=n, B=1000., kernel_object=k) - - GP = GaussianProcess(d=d, s=s) - xtest = torch.from_numpy(interval(n, d)) - - x = torch.from_numpy(np.random.uniform(-1, 1, size=(N, d))) - - F_true = lambda x: torch.sum(torch.sin(x) ** 2 - 0.1, dim=1).view(-1, 1) - F = lambda x: F_true(x) + s * torch.randn(x.size()[0]).view(-1, 1).double() - y = F(x) - - # Try to plot the basis functions - msqrt = int(np.sqrt(m)) - fig, axs = plt.subplots(msqrt, msqrt, figsize=(15, 7)) - for i in range(m): - f_i = Emb.basis_fun(xtest, i) ## basis function - xx = xtest[:, 0].numpy() - yy = xtest[:, 1].numpy() - ax = axs[int(i // msqrt), (i % msqrt)] - grid_x, grid_y = np.mgrid[min(xx):max(xx):100j, min(yy):max(yy):100j] - grid_z_f = griddata((xx, yy), f_i[:, 0].detach().numpy(), (grid_x, grid_y), method='linear') - cs = ax.contourf(grid_x, grid_y, grid_z_f, levels=10) - ax.contour(cs, colors='k') - # cbar = fig.colorbar(cs) - # if self.x is not None: - # ax.scatter(self.x[:, 0].detach().numpy(), self.x[:, 1].detach().numpy(), c='r', s=100, marker="o") - ax.grid(c='k', ls='-', alpha=0.1) - - plt.savefig("positive.png") - plt.show() - - Emb.fit(x, y) - GP.fit_gp(x, y) - - mu, _ = Emb.mean_std(xtest) - mu_true, _ = GP.mean_std(xtest) - - Emb.visualize_function(xtest, [F_true, lambda x: GP.mean_std(x)[0], lambda x: Emb.mean_std(x)[0]]) - # Emb.visualize_function(xtest,GP.mean_std) - # Emb.visualize_function(xtest,Emb.mean_std) - - # plt.plot(xtest,mu_true,'b--', label = 'GP') - - # plt.plot(x,y,'ro') - # plt.plot(xtest, mu, 'g-', label = 'positive basis ') - # plt.legend() - plt.show() + from stpy.continuous_processes.gauss_procc import GaussianProcess + from stpy.helpers.helper import interval + import matplotlib.pyplot as plt + from scipy.interpolate import griddata + + d = 2 + m = 64 + n = 64 + N = 20 + sqrtbeta = 2 + s = 0.01 + b = 0 + gamma = 0.5 + k = KernelFunction(gamma=gamma, d=2) + + Emb = OptimalPositiveBasis( + d, m, offset=0.2, s=s, b=b, discretization_size=n, B=1000.0, kernel_object=k + ) + + GP = GaussianProcess(d=d, s=s) + xtest = torch.from_numpy(interval(n, d)) + + x = torch.from_numpy(np.random.uniform(-1, 1, size=(N, d))) + + F_true = lambda x: torch.sum(torch.sin(x) ** 2 - 0.1, dim=1).view(-1, 1) + F = lambda x: F_true(x) + s * torch.randn(x.size()[0]).view(-1, 1).double() + y = F(x) + + # Try to plot the basis functions + msqrt = int(np.sqrt(m)) + fig, axs = plt.subplots(msqrt, msqrt, figsize=(15, 7)) + for i in range(m): + f_i = Emb.basis_fun(xtest, i) ## basis function + xx = xtest[:, 0].numpy() + yy = xtest[:, 1].numpy() + ax = axs[int(i // msqrt), (i % msqrt)] + grid_x, grid_y = np.mgrid[min(xx) : max(xx) : 100j, min(yy) : max(yy) : 100j] + grid_z_f = griddata( + (xx, yy), f_i[:, 0].detach().numpy(), (grid_x, grid_y), method="linear" + ) + cs = ax.contourf(grid_x, grid_y, grid_z_f, levels=10) + ax.contour(cs, colors="k") + # cbar = fig.colorbar(cs) + # if self.x is not None: + # ax.scatter(self.x[:, 0].detach().numpy(), self.x[:, 1].detach().numpy(), c='r', s=100, marker="o") + ax.grid(c="k", ls="-", alpha=0.1) + + plt.savefig("positive.png") + plt.show() + + Emb.fit(x, y) + GP.fit_gp(x, y) + + mu, _ = Emb.mean_std(xtest) + mu_true, _ = GP.mean_std(xtest) + + Emb.visualize_function( + xtest, [F_true, lambda x: GP.mean_std(x)[0], lambda x: Emb.mean_std(x)[0]] + ) + # Emb.visualize_function(xtest,GP.mean_std) + # Emb.visualize_function(xtest,Emb.mean_std) + + # plt.plot(xtest,mu_true,'b--', label = 'GP') + + # plt.plot(x,y,'ro') + # plt.plot(xtest, mu, 'g-', label = 'positive basis ') + # plt.legend() + plt.show() diff --git a/stpy/embeddings/packing_embedding.py b/stpy/embeddings/packing_embedding.py index a08d2a7..ce6c77a 100755 --- a/stpy/embeddings/packing_embedding.py +++ b/stpy/embeddings/packing_embedding.py @@ -10,111 +10,116 @@ class PackingEmbedding(Embedding): - def __init__(self, d, m, kernel_object, interval=[-1, 1], n=100, method='svd'): - self.d = d - self.m = m - self.interval = interval - self.size = self.get_m() - self.kernel_object = kernel_object - - self.kernel = kernel_object.kernel - self.n = n - self.method = method - self.construct() - - def construct(self): - xtest = interval_torch(self.n, self.d, offset=[self.interval for _ in range(self.d)]) - y = xtest[:, 0].view(-1, 1) * 0 - - self.new_kernel_object = KernelFunction(kernel_name=self.kernel_object.optkernel, - gamma=self.kernel_object.gamma, d=self.d) - self.GP = NystromFeatures(self.new_kernel_object, m=self.m, approx=self.method) - self.GP.fit_gp(xtest, y) - - def basis_fun(self, x, j): - return self.GP.embed(x)[:, j].view(-1, 1) - - def embed(self, x): - return self.GP.embed(x) - - def _derivative_1(self, x): - dphi = batch_jacobian(self.embed, x).transpose(0, 1) - return dphi - - def _derivative_2(self, x): - d2phi = batch_hessian(self.embed, x).transpose(0, 1).transpose(0, 2) - return d2phi - - def derivative_1(self, x): - if self.kernel_object.optkernel == "squared_exponential": - xs = self.GP.xs - M = self.GP.M - derivative = self.kernel_object.derivative_1(xs, x) - res = torch.einsum('ij,kil->kjl', M, derivative) - return res - else: - dphi = self._derivative_1(x) - return dphi - - def derivative_2(self, x): - if self.kernel_object.optkernel == "squared_exponential": - xs = self.GP.xs - M = self.GP.M - derivative = self.kernel_object.derivative_2(xs, x) - res = torch.einsum('ij,kilm->kjlm', M, derivative) - return res - else: - d2phi = self._derivative_2(x) - return d2phi + def __init__(self, d, m, kernel_object, interval=[-1, 1], n=100, method="svd"): + self.d = d + self.m = m + self.interval = interval + self.size = self.get_m() + self.kernel_object = kernel_object + + self.kernel = kernel_object.kernel + self.n = n + self.method = method + self.construct() + + def construct(self): + xtest = interval_torch( + self.n, self.d, offset=[self.interval for _ in range(self.d)] + ) + y = xtest[:, 0].view(-1, 1) * 0 + + self.new_kernel_object = KernelFunction( + kernel_name=self.kernel_object.optkernel, + gamma=self.kernel_object.gamma, + d=self.d, + ) + self.GP = NystromFeatures(self.new_kernel_object, m=self.m, approx=self.method) + self.GP.fit_gp(xtest, y) + + def basis_fun(self, x, j): + return self.GP.embed(x)[:, j].view(-1, 1) + + def embed(self, x): + return self.GP.embed(x) + + def _derivative_1(self, x): + dphi = batch_jacobian(self.embed, x).transpose(0, 1) + return dphi + + def _derivative_2(self, x): + d2phi = batch_hessian(self.embed, x).transpose(0, 1).transpose(0, 2) + return d2phi + + def derivative_1(self, x): + if self.kernel_object.optkernel == "squared_exponential": + xs = self.GP.xs + M = self.GP.M + derivative = self.kernel_object.derivative_1(xs, x) + res = torch.einsum("ij,kil->kjl", M, derivative) + return res + else: + dphi = self._derivative_1(x) + return dphi + + def derivative_2(self, x): + if self.kernel_object.optkernel == "squared_exponential": + xs = self.GP.xs + M = self.GP.M + derivative = self.kernel_object.derivative_2(xs, x) + res = torch.einsum("ij,kilm->kjlm", M, derivative) + return res + else: + d2phi = self._derivative_2(x) + return d2phi if __name__ == "__main__": - from stpy.continuous_processes.kernelized_features import KernelizedFeatures + from stpy.continuous_processes.kernelized_features import KernelizedFeatures - d = 1 - m = 200 - n = 128 - N = 10 + d = 1 + m = 200 + n = 128 + N = 10 - lam = 1. + lam = 1.0 - s = 0.0001 - gamma = 0.1 + s = 0.0001 + gamma = 0.1 - xtest = torch.from_numpy(interval(n, d)) - x = torch.from_numpy(interval(N, d)) + xtest = torch.from_numpy(interval(n, d)) + x = torch.from_numpy(interval(N, d)) - kernel_object = KernelFunction(gamma=gamma) - Emb = PackingEmbedding(d, m, kernel_object=kernel_object, n=256, method='nothing') - print(Emb.GP.M.size()) - GP = KernelizedFeatures(embedding=Emb, m=m, s=s, lam=lam, d=d) - y = GP.sample(x) * 0 - y[5, 0] = 0.5 + kernel_object = KernelFunction(gamma=gamma) + Emb = PackingEmbedding(d, m, kernel_object=kernel_object, n=256, method="nothing") + print(Emb.GP.M.size()) + GP = KernelizedFeatures(embedding=Emb, m=m, s=s, lam=lam, d=d) + y = GP.sample(x) * 0 + y[5, 0] = 0.5 - GP.fit_gp(x, y) - mu, std = GP.mean_std(xtest) + GP.fit_gp(x, y) + mu, std = GP.mean_std(xtest) - der = Emb.derivative_1(xtest)[:, :, 0] - der_comp = Emb._derivative_1(xtest)[:, :, 0] + der = Emb.derivative_1(xtest)[:, :, 0] + der_comp = Emb._derivative_1(xtest)[:, :, 0] - print(torch.norm(der - der_comp)) + print(torch.norm(der - der_comp)) - der = der @ GP.theta_mean() - der_comp = der_comp @ GP.theta_mean() + der = der @ GP.theta_mean() + der_comp = der_comp @ GP.theta_mean() - der2 = Emb.derivative_2(xtest)[:, :, 0, 0] - der2_comp = Emb._derivative_2(xtest)[:, :, 0, 0] + der2 = Emb.derivative_2(xtest)[:, :, 0, 0] + der2_comp = Emb._derivative_2(xtest)[:, :, 0, 0] - print(torch.norm(der2 - der2_comp)) + print(torch.norm(der2 - der2_comp)) - der2 = der2 @ GP.theta_mean() - der2_comp = der2_comp @ GP.theta_mean() + der2 = der2 @ GP.theta_mean() + der2_comp = der2_comp @ GP.theta_mean() - plt.plot(xtest, mu) - plt.plot(xtest, der) - plt.plot(xtest, der_comp, '--') - plt.plot(xtest, der2) - plt.plot(xtest, der2_comp, '--') - plt.plot(x, y, 'bo') - plt.grid() - plt.show() + plt.plot(xtest, mu) + plt.plot(xtest, der) + plt.plot(xtest, der_comp, "--") + plt.plot(xtest, der2) + plt.plot(xtest, der2_comp, "--") + plt.plot(x, y, "bo") + plt.grid() + plt.show() diff --git a/stpy/embeddings/polynomial_embedding.py b/stpy/embeddings/polynomial_embedding.py index eba9a74..5fbd50c 100755 --- a/stpy/embeddings/polynomial_embedding.py +++ b/stpy/embeddings/polynomial_embedding.py @@ -41,174 +41,189 @@ from sklearn.preprocessing import PolynomialFeatures -class CustomEmbedding(): - def __init__(self, d, embedding_function, m, groups=None, quadrature="fixed"): - self.d = d - self.groups = groups - self.embedding_function = embedding_function - self.m = m - self.quadrature = quadrature - - def embed(self, x): - return self.embedding_function(x) - - def get_m(self): - return self.m - - def integral(self, S): - varphi = torch.zeros(size=(self.m, 1)).double() - - if self.quadrature == "fixed": - if S.d == 1: - weights, nodes = S.return_legendre_discretization(n=512) - Z = self.embed(nodes) - varphi = torch.einsum('i,ij->j', weights, Z) - return varphi.view(-1, 1) - elif S.d == 2: - weights, nodes = S.return_legendre_discretization(n=50) - Z = self.embed(nodes) - varphi = torch.einsum('i,ij->j', weights, Z) - return varphi.view(-1, 1) - else: - if S.d == 1: - for i in range(self.m): - Fi = lambda x: self.embed(torch.from_numpy(np.array(x)).view(1, -1)).view(-1).numpy() - val, status = integrate.quad(Fi, float(S.bounds[0, 0]), float(S.bounds[0, 1])) - varphi[i] = val - elif S.d == 2: - for i in range(self.m): - Fi = lambda x: self.embed(x).view(-1)[i] - integrand = lambda x, y: Fi(torch.Tensor([x, y]).view(1, 2).double()).numpy() - val, status = integrate.dblquad(integrand, float(S.bounds[0, 0]), float(S.bounds[0, 1]), - lambda x: float(S.bounds[1, 0]), - lambda x: float(S.bounds[1, 1]), epsabs=1.49e-03, epsrel=1.49e-03) - varphi[i] = val - return varphi - - -class PolynomialEmbedding(): - - def __init__(self, d, p, kappa=1., groups=None, include_bias=True): - self.d = d - self.p = p - self.kappa = kappa - self.groups = groups - self.compute(include_bias=include_bias) - self.include_bias = include_bias - - def compute(self, include_bias=True): - self.poly = PolynomialFeatures(degree=self.p, include_bias=include_bias) - if self.groups is None: - self.poly.fit_transform(np.random.randn(1, self.d)) - self.degrees = torch.from_numpy(self.poly.powers_).double() - self.size = self.degrees.size()[0] - else: - self.degrees = [] - self.size = 0 - self.sizes = [] - for group in self.groups: - self.poly.fit_transform(np.random.randn(1, len(group))) - z = torch.from_numpy(self.poly.powers_).double() - self.degrees.append(z) - self.sizes.append(z.size()[0]) - self.size += z.size()[0] - - def embed_group(self, x, j): - (n, d) = x.size() - x = x.view(n, -1) - Phi = torch.zeros(size=(n, self.sizes[j]), dtype=torch.float64) - group = self.groups[j] - for i in range(n): - y = x[i, :] - z = y.view(1, len(group)) - Phi[i, :] = torch.prod(torch.pow(z, self.degrees[j]), dim=1).view(-1) - return Phi - - def get_sub_indices(self, group): - ind = [] - for index, elem in enumerate(self.degrees): - z = torch.sum(elem[0:group - 2]) + torch.sum(elem[group + 1:]) - if (elem[group] >= 0.0) and (z <= 0.): - ind.append(index) - return ind - - def embed(self, x): - (n, d) = x.size() - # zero = torch.pow(x[0,:] * 0, self.degrees) - Phi = torch.zeros(size=(n, self.size), dtype=torch.float64) - - if self.groups is None: - for i in range(n): - y = x[i, :] - Phi[i, :] = torch.prod(torch.pow(y, self.degrees), dim=1) - else: - for i in range(n): - y = x[i, :] - for j, group in enumerate(self.groups): - z = y[group].view(1, len(group)) - start = int(np.sum(self.sizes[0:j])) - end = np.sum(self.sizes[0:j + 1]) - Phi[i, start:end] = torch.prod(torch.pow(z, self.degrees[j]), dim=1).view(-1) - return np.sqrt(self.kappa) * Phi - - def derivative_1(self, x): - pass - - def derivative_2(self, x): - pass - - -class ChebyschevEmbedding(): - - - def get_m(self): - return self.m - - def __init__(self, d, p, groups=None, include_bias=True): - self.d = d - self.p = p - self.groups = groups - self.c = np.ones(self.p) - self.poly = cheb.Chebyshev(self.c) - self.size = self.p - self.m = self.p - - def embed(self, x): - out = np.zeros(shape=(int(x.size()[0]), self.p)) - z = None - for p in np.arange(1, self.p + 1, 1): - c = np.ones(p) - if p > 1: - zold = z - z = cheb.chebval(x.numpy(), c) - out[:, p - 1] = (z - zold).reshape(-1) - else: - z = cheb.chebval(x.numpy(), c) - out[:, p - 1] = z.reshape(-1) - return torch.from_numpy(out) - - def derivative_1(self, x): - pass - - def derivative_2(self, x): - pass +class CustomEmbedding: + def __init__(self, d, embedding_function, m, groups=None, quadrature="fixed"): + self.d = d + self.groups = groups + self.embedding_function = embedding_function + self.m = m + self.quadrature = quadrature + + def embed(self, x): + return self.embedding_function(x) + + def get_m(self): + return self.m + + def integral(self, S): + varphi = torch.zeros(size=(self.m, 1)).double() + + if self.quadrature == "fixed": + if S.d == 1: + weights, nodes = S.return_legendre_discretization(n=512) + Z = self.embed(nodes) + varphi = torch.einsum("i,ij->j", weights, Z) + return varphi.view(-1, 1) + elif S.d == 2: + weights, nodes = S.return_legendre_discretization(n=50) + Z = self.embed(nodes) + varphi = torch.einsum("i,ij->j", weights, Z) + return varphi.view(-1, 1) + else: + if S.d == 1: + for i in range(self.m): + Fi = ( + lambda x: self.embed(torch.from_numpy(np.array(x)).view(1, -1)) + .view(-1) + .numpy() + ) + val, status = integrate.quad( + Fi, float(S.bounds[0, 0]), float(S.bounds[0, 1]) + ) + varphi[i] = val + elif S.d == 2: + for i in range(self.m): + Fi = lambda x: self.embed(x).view(-1)[i] + integrand = lambda x, y: Fi( + torch.Tensor([x, y]).view(1, 2).double() + ).numpy() + val, status = integrate.dblquad( + integrand, + float(S.bounds[0, 0]), + float(S.bounds[0, 1]), + lambda x: float(S.bounds[1, 0]), + lambda x: float(S.bounds[1, 1]), + epsabs=1.49e-03, + epsrel=1.49e-03, + ) + varphi[i] = val + return varphi + + +class PolynomialEmbedding: + + def __init__(self, d, p, kappa=1.0, groups=None, include_bias=True): + self.d = d + self.p = p + self.kappa = kappa + self.groups = groups + self.compute(include_bias=include_bias) + self.include_bias = include_bias + + def compute(self, include_bias=True): + self.poly = PolynomialFeatures(degree=self.p, include_bias=include_bias) + if self.groups is None: + self.poly.fit_transform(np.random.randn(1, self.d)) + self.degrees = torch.from_numpy(self.poly.powers_).double() + self.size = self.degrees.size()[0] + else: + self.degrees = [] + self.size = 0 + self.sizes = [] + for group in self.groups: + self.poly.fit_transform(np.random.randn(1, len(group))) + z = torch.from_numpy(self.poly.powers_).double() + self.degrees.append(z) + self.sizes.append(z.size()[0]) + self.size += z.size()[0] + + def embed_group(self, x, j): + (n, d) = x.size() + x = x.view(n, -1) + Phi = torch.zeros(size=(n, self.sizes[j]), dtype=torch.float64) + group = self.groups[j] + for i in range(n): + y = x[i, :] + z = y.view(1, len(group)) + Phi[i, :] = torch.prod(torch.pow(z, self.degrees[j]), dim=1).view(-1) + return Phi + + def get_sub_indices(self, group): + ind = [] + for index, elem in enumerate(self.degrees): + z = torch.sum(elem[0 : group - 2]) + torch.sum(elem[group + 1 :]) + if (elem[group] >= 0.0) and (z <= 0.0): + ind.append(index) + return ind + + def embed(self, x): + (n, d) = x.size() + # zero = torch.pow(x[0,:] * 0, self.degrees) + Phi = torch.zeros(size=(n, self.size), dtype=torch.float64) + + if self.groups is None: + for i in range(n): + y = x[i, :] + Phi[i, :] = torch.prod(torch.pow(y, self.degrees), dim=1) + else: + for i in range(n): + y = x[i, :] + for j, group in enumerate(self.groups): + z = y[group].view(1, len(group)) + start = int(np.sum(self.sizes[0:j])) + end = np.sum(self.sizes[0 : j + 1]) + Phi[i, start:end] = torch.prod( + torch.pow(z, self.degrees[j]), dim=1 + ).view(-1) + return np.sqrt(self.kappa) * Phi + + def derivative_1(self, x): + pass + + def derivative_2(self, x): + pass + + +class ChebyschevEmbedding: + + def get_m(self): + return self.m + + def __init__(self, d, p, groups=None, include_bias=True): + self.d = d + self.p = p + self.groups = groups + self.c = np.ones(self.p) + self.poly = cheb.Chebyshev(self.c) + self.size = self.p + self.m = self.p + + def embed(self, x): + out = np.zeros(shape=(int(x.size()[0]), self.p)) + z = None + for p in np.arange(1, self.p + 1, 1): + c = np.ones(p) + if p > 1: + zold = z + z = cheb.chebval(x.numpy(), c) + out[:, p - 1] = (z - zold).reshape(-1) + else: + z = cheb.chebval(x.numpy(), c) + out[:, p - 1] = z.reshape(-1) + return torch.from_numpy(out) + + def derivative_1(self, x): + pass + + def derivative_2(self, x): + pass if __name__ == "__main__": - d = 2 - p = 4 - emb = PolynomialEmbedding(d, p, groups=[[0], [1]]) - x1 = torch.randn(size=(1, d), dtype=torch.float64) - x2 = torch.randn(size=(1, d), dtype=torch.float64) - xc = torch.cat((x1, x2)) - - print(emb.embed(x1).size()) - print(emb.embed(x2).size()) - print(emb.embed(xc).size()) - - print("--------") - emb = PolynomialEmbedding(d, p) - print(emb.get_sub_indices(0)) + d = 2 + p = 4 + emb = PolynomialEmbedding(d, p, groups=[[0], [1]]) + x1 = torch.randn(size=(1, d), dtype=torch.float64) + x2 = torch.randn(size=(1, d), dtype=torch.float64) + xc = torch.cat((x1, x2)) + + print(emb.embed(x1).size()) + print(emb.embed(x2).size()) + print(emb.embed(xc).size()) + + print("--------") + emb = PolynomialEmbedding(d, p) + print(emb.get_sub_indices(0)) # d = 1 # emb = ChebyschevEmbedding(d,3) # x1 = torch.randn(size = (1,d), dtype = torch.float64) diff --git a/stpy/embeddings/positive_embedding.py b/stpy/embeddings/positive_embedding.py index 2e41df1..340c094 100644 --- a/stpy/embeddings/positive_embedding.py +++ b/stpy/embeddings/positive_embedding.py @@ -13,194 +13,243 @@ class PositiveEmbedding(Embedding): - def __init__(self, d, m, kernel_object: Optional[KernelFunction]=None, interval=(-1, 1), B=1, b=0, s=0.001, offset=0.): - """ - - Parameters - ---------- - d - Dimension of the embedding - m - Number of basis functions - b, optional - Minimal value of the rate function, by default 0 - B, optional - Maximal value of the rate function, by default 1 - """ - self.d = d - """ Dimension of the embedding """ - self.m = m - """ Number of basis functions """ - self.b = b - """ Minimal value of the rate function """ - self.size = self.get_m() - """ Number of basis functions times number of dimensions """ - self.interval = interval - if kernel_object is None: - #self.kernel_object = KernelFunction() - #self.kernel = lambda x, y: self.kernel_object.kernel(x, y) - self.kernel = None - else: - self.kernel_object = kernel_object - self.kernel = self.kernel_object.kernel - self.B = B - self.s = s - self.offset = offset - - self.interval = (self.interval[0] - offset, self.interval[1] + offset) - - self.borel_set = BorelSet(d=1, bounds=torch.Tensor([[self.interval[0], self.interval[1]]]).double()) - self.mu = None - self.precomp = False - self.procomp_integrals = {} - - def get_size(self): - return self.m ** self.d - - def integral(self, S): - pass - - def basis_fun(self, x, j): - """ - Return the value of basis function \phi_j(x) - - :param x: double, need to be in the interval - :param j: integer, index of hat functions, 0 <= j <= m-1 - :return: \phi_j(x) - """ - pass - - def get_constraints(self): - s = self.m ** self.d - l = torch.from_numpy(np.full(s, self.b)) - u = torch.from_numpy(np.full(s, self.B)) - Lambda = torch.from_numpy(np.identity(s)) - return (l, Lambda, u) - - def cov(self, inverse=False): - r"""Should return $\Gamma^T = \sqrt{V^{-1} K V^{-1}}^T$ - - $\sqrt{(V^TV)^* \cdot K}$ where $V_{ij} = \phi_i(t_j)$ and - $K_{ij} = k(t_i, t_j)$ and the $t_i$ are equally spaced grid points - in the cartesian product set $i^d$ where i is `self.interval` - - """ - if self.precomp == False: - dm = (self.interval[1] - self.interval[0]) / (self.m - 1) # delta m - t = self.interval[0] + torch.linspace(0, self.m - 1, self.m) * dm - - if self.d == 1: - t = t.view(-1, 1).double() - elif self.d == 2: - t = torch.from_numpy(cartesian([t.numpy(), t.numpy()])).double() - elif self.d == 3: - t = torch.from_numpy(cartesian([t.numpy(), t.numpy(), t.numpy()])).double() - if self.kernel is not None: - self.Gamma = self.kernel(t, t) - Z = self.embed_internal(t) - M = torch.pinverse(Z.T @ Z + (self.s) * torch.eye(self.Gamma.size()[0])) - self.M = torch.from_numpy(np.real(scipy.linalg.sqrtm(M.numpy()))) - self.Gamma_half = torch.from_numpy( - np.real(scipy.linalg.sqrtm(self.Gamma.numpy() + 1e-5 * (self.s ** 2) * np.eye(self.Gamma.size()[0])))) - self.Gamma_half = self.M @ self.Gamma_half - self.invGamma_half = torch.pinverse(self.Gamma_half) - else: - self.Gamma_half = torch.eye(self.m).double() - self.precomp = True - else: - pass - - if inverse == True: - return self.Gamma_half, self.invGamma_half - else: - return self.Gamma_half - - def embed_internal(self, x): - """ Returns a tensor $T$ where $T_{i,j} = \phi_j(x_i)$. - """ - if self.d == 1: - out = torch.zeros(size=(x.size()[0], self.m), dtype=torch.float64) - for j in range(self.m): - out[:, j] = self.basis_fun(x, j).view(-1) - return out - - elif self.d == 2: - phi_1 = torch.cat([self.basis_fun(x[:, 0].view(-1, 1), j) for j in range(0, self.m)], dim=1) - phi_2 = torch.cat([self.basis_fun(x[:, 1].view(-1, 1), j) for j in range(0, self.m)], dim=1) - n = x.size()[0] - out = [] - for i in range(n): - out.append(torch.from_numpy(np.kron(phi_1[i, :].numpy(), phi_2[i, :].numpy())).view(1, -1)) - out = torch.cat(out, dim=0) - return out - elif self.d == 3: - phi_1 = torch.cat([self.basis_fun(x[:, 0].view(-1, 1), j) for j in range(0, self.m)], dim=1) - phi_2 = torch.cat([self.basis_fun(x[:, 1].view(-1, 1), j) for j in range(0, self.m)], dim=1) - phi_3 = torch.cat([self.basis_fun(x[:, 2].view(-1, 1), j) for j in range(0, self.m)], dim=1) - - n = x.size()[0] - out = [] - for i in range(n): - out.append( - torch.from_numpy(np.kron(phi_3[i, :], np.kron(phi_1[i, :].numpy(), phi_2[i, :].numpy()))).view(1, - -1)) - out = torch.cat(out, dim=0) - return out - - def fit(self, x, y, already_embeded=False): - m = self.get_m() - - l, Lambda, u = self.get_constraints() - Gamma_half = self.cov() - - if already_embeded == False: - Phi = self.embed(x).numpy() - else: - Phi = x.numpy() - - xi = cp.Variable(m) - obj = cp.Minimize(self.s ** 2 * cp.norm2(xi) + cp.sum_squares(Phi @ xi - y.numpy().reshape(-1))) - - constraints = [] - Lambda = Lambda @ Gamma_half.numpy() - if not np.all(l == -np.inf): - constraints.append(Lambda[l != -np.inf] @ xi >= l[l != -np.inf]) - if not np.all(u == np.inf): - constraints.append(Lambda[u != np.inf] @ xi <= u[u != np.inf]) - - prob = cp.Problem(obj, constraints) - prob.solve(solver=cp.MOSEK, warm_start=False, - verbose=False, mosek_params={mosek.iparam.intpnt_solve_form: mosek.solveform.dual}) - - if prob.status != "optimal": - raise ValueError('cannot compute the mode') - - mode = xi.value - self.mode = torch.from_numpy(mode).view(-1, 1) - self.mu = self.mode - return mode - - def embed(self, x): - r"""Calculates $\Phi(x)^T = \phi(x)^T \Gamma^T$""" - Gamma_half = self.cov() - return self.embed_internal(x) @ Gamma_half - - def mean(self, xtest): - embeding = self.embed(xtest) - mean = embeding @ self.mu - return mean - - def mean_std(self, xtest): - embeding = self.embed(xtest) - mean = embeding @ self.mu - return mean, None - - def sample_theta(self): - self.mu = torch.randn(size=(self.get_m(), 1)) - return self.mu - - def sample(self, xtest, size=1): - return self.embed(xtest) @ self.sample_theta() - - def get_m(self): - return self.m ** self.d + def __init__( + self, + d, + m, + kernel_object: Optional[KernelFunction] = None, + interval=(-1, 1), + B=1, + b=0, + s=0.001, + offset=0.0, + ): + """ + + Parameters + ---------- + d + Dimension of the embedding + m + Number of basis functions + b, optional + Minimal value of the rate function, by default 0 + B, optional + Maximal value of the rate function, by default 1 + """ + self.d = d + """ Dimension of the embedding """ + self.m = m + """ Number of basis functions """ + self.b = b + """ Minimal value of the rate function """ + self.size = self.get_m() + """ Number of basis functions times number of dimensions """ + self.interval = interval + if kernel_object is None: + # self.kernel_object = KernelFunction() + # self.kernel = lambda x, y: self.kernel_object.kernel(x, y) + self.kernel = None + else: + self.kernel_object = kernel_object + self.kernel = self.kernel_object.kernel + self.B = B + self.s = s + self.offset = offset + + self.interval = (self.interval[0] - offset, self.interval[1] + offset) + + self.borel_set = BorelSet( + d=1, bounds=torch.Tensor([[self.interval[0], self.interval[1]]]).double() + ) + self.mu = None + self.precomp = False + self.procomp_integrals = {} + + def get_size(self): + return self.m**self.d + + def integral(self, S): + pass + + def basis_fun(self, x, j): + """ + Return the value of basis function \phi_j(x) + + :param x: double, need to be in the interval + :param j: integer, index of hat functions, 0 <= j <= m-1 + :return: \phi_j(x) + """ + pass + + def get_constraints(self): + s = self.m**self.d + l = torch.from_numpy(np.full(s, self.b)) + u = torch.from_numpy(np.full(s, self.B)) + Lambda = torch.from_numpy(np.identity(s)) + return (l, Lambda, u) + + def cov(self, inverse=False): + r"""Should return $\Gamma^T = \sqrt{V^{-1} K V^{-1}}^T$ + + $\sqrt{(V^TV)^* \cdot K}$ where $V_{ij} = \phi_i(t_j)$ and + $K_{ij} = k(t_i, t_j)$ and the $t_i$ are equally spaced grid points + in the cartesian product set $i^d$ where i is `self.interval` + + """ + if self.precomp == False: + dm = (self.interval[1] - self.interval[0]) / (self.m - 1) # delta m + t = self.interval[0] + torch.linspace(0, self.m - 1, self.m) * dm + + if self.d == 1: + t = t.view(-1, 1).double() + elif self.d == 2: + t = torch.from_numpy(cartesian([t.numpy(), t.numpy()])).double() + elif self.d == 3: + t = torch.from_numpy( + cartesian([t.numpy(), t.numpy(), t.numpy()]) + ).double() + if self.kernel is not None: + self.Gamma = self.kernel(t, t) + Z = self.embed_internal(t) + M = torch.pinverse(Z.T @ Z + (self.s) * torch.eye(self.Gamma.size()[0])) + self.M = torch.from_numpy(np.real(scipy.linalg.sqrtm(M.numpy()))) + self.Gamma_half = torch.from_numpy( + np.real( + scipy.linalg.sqrtm( + self.Gamma.numpy() + + 1e-5 * (self.s**2) * np.eye(self.Gamma.size()[0]) + ) + ) + ) + self.Gamma_half = self.M @ self.Gamma_half + self.invGamma_half = torch.pinverse(self.Gamma_half) + else: + self.Gamma_half = torch.eye(self.m).double() + self.precomp = True + else: + pass + + if inverse == True: + return self.Gamma_half, self.invGamma_half + else: + return self.Gamma_half + + def embed_internal(self, x): + """Returns a tensor $T$ where $T_{i,j} = \phi_j(x_i)$.""" + if self.d == 1: + out = torch.zeros(size=(x.size()[0], self.m), dtype=torch.float64) + for j in range(self.m): + out[:, j] = self.basis_fun(x, j).view(-1) + return out + + elif self.d == 2: + phi_1 = torch.cat( + [self.basis_fun(x[:, 0].view(-1, 1), j) for j in range(0, self.m)], + dim=1, + ) + phi_2 = torch.cat( + [self.basis_fun(x[:, 1].view(-1, 1), j) for j in range(0, self.m)], + dim=1, + ) + n = x.size()[0] + out = [] + for i in range(n): + out.append( + torch.from_numpy( + np.kron(phi_1[i, :].numpy(), phi_2[i, :].numpy()) + ).view(1, -1) + ) + out = torch.cat(out, dim=0) + return out + elif self.d == 3: + phi_1 = torch.cat( + [self.basis_fun(x[:, 0].view(-1, 1), j) for j in range(0, self.m)], + dim=1, + ) + phi_2 = torch.cat( + [self.basis_fun(x[:, 1].view(-1, 1), j) for j in range(0, self.m)], + dim=1, + ) + phi_3 = torch.cat( + [self.basis_fun(x[:, 2].view(-1, 1), j) for j in range(0, self.m)], + dim=1, + ) + + n = x.size()[0] + out = [] + for i in range(n): + out.append( + torch.from_numpy( + np.kron( + phi_3[i, :], + np.kron(phi_1[i, :].numpy(), phi_2[i, :].numpy()), + ) + ).view(1, -1) + ) + out = torch.cat(out, dim=0) + return out + + def fit(self, x, y, already_embeded=False): + m = self.get_m() + + l, Lambda, u = self.get_constraints() + Gamma_half = self.cov() + + if already_embeded == False: + Phi = self.embed(x).numpy() + else: + Phi = x.numpy() + + xi = cp.Variable(m) + obj = cp.Minimize( + self.s**2 * cp.norm2(xi) + cp.sum_squares(Phi @ xi - y.numpy().reshape(-1)) + ) + + constraints = [] + Lambda = Lambda @ Gamma_half.numpy() + if not np.all(l == -np.inf): + constraints.append(Lambda[l != -np.inf] @ xi >= l[l != -np.inf]) + if not np.all(u == np.inf): + constraints.append(Lambda[u != np.inf] @ xi <= u[u != np.inf]) + + prob = cp.Problem(obj, constraints) + prob.solve( + solver=cp.MOSEK, + warm_start=False, + verbose=False, + mosek_params={mosek.iparam.intpnt_solve_form: mosek.solveform.dual}, + ) + + if prob.status != "optimal": + raise ValueError("cannot compute the mode") + + mode = xi.value + self.mode = torch.from_numpy(mode).view(-1, 1) + self.mu = self.mode + return mode + + def embed(self, x): + r"""Calculates $\Phi(x)^T = \phi(x)^T \Gamma^T$""" + Gamma_half = self.cov() + return self.embed_internal(x) @ Gamma_half + + def mean(self, xtest): + embeding = self.embed(xtest) + mean = embeding @ self.mu + return mean + + def mean_std(self, xtest): + embeding = self.embed(xtest) + mean = embeding @ self.mu + return mean, None + + def sample_theta(self): + self.mu = torch.randn(size=(self.get_m(), 1)) + return self.mu + + def sample(self, xtest, size=1): + return self.embed(xtest) @ self.sample_theta() + + def get_m(self): + return self.m**self.d diff --git a/stpy/embeddings/random_nn.py b/stpy/embeddings/random_nn.py index bf5c57a..b94ae50 100755 --- a/stpy/embeddings/random_nn.py +++ b/stpy/embeddings/random_nn.py @@ -5,184 +5,216 @@ class RandomMap(nn.Module): - def __init__(self, d, m, fun, output=2): - super(RandomMap, self).__init__() - self.W = torch.normal(mean=torch.zeros(m, d, dtype=torch.float64), std=1. / np.sqrt(d * m) ** 2) - self.W.requires_grad_(True) - self.w = torch.normal(mean=torch.zeros(m, output, dtype=torch.float64), std=1. / np.sqrt(d * m) ** 2) - self.w.requires_grad_(True) - self.b = torch.normal(mean=torch.zeros(output, dtype=torch.float64), std=1. / np.sqrt(d * m) ** 2) - self.b.requires_grad_(True) - self.fun = fun - self.output = output - - def map(self, x): - y = self.fun(torch.mm(self.W, torch.t(x))) - return y - - def forward(self, x): - z = self.map(x) - z = torch.mm(torch.t(z), self.w) - return z - - def get_params(self): - return [self.W, self.w] - - def get_params_last(self): - return [self.w] - - def fit_map(self, x, y, epochs=1000, verbose=False, reg=0.1, lr=0.1): - criterion = nn.MSELoss() - - import torch.optim as optim - optimizer = optim.SGD([self.W, self.w], lr=lr) - - batch_size = 100 - - for i in range(epochs): - for j in range(x.size()[0] // batch_size): - optimizer.zero_grad() # zero the gradient buffers - output = self.forward(x[j * batch_size:(j + 1) * batch_size]) - loss = criterion(output, y[j * batch_size:(j + 1) * batch_size]) - loss.backward(retain_graph=True) - optimizer.step() # Does the update - - if verbose == True or i % verbose == 0: - output = self.forward(x) - loss_full = criterion(output, y) - print(i, loss_full) - optimizer.step() # Does the update - - def fit_map_lasso(self, x, y, epochs=1000, verbose=False, reg=0.1, lr=0.1, l1=0.1): - criterion = nn.MSELoss() - - import torch.optim as optim - optimizer = optim.SGD([self.W, self.w], lr=lr) - - batch_size = 100 - - for i in range(epochs): - for j in range(x.size()[0] // batch_size): - optimizer.zero_grad() # zero the gradient buffers - output = self.forward(x[j * batch_size:(j + 1) * batch_size]) - loss = criterion(output, y[j * batch_size:(j + 1) * batch_size]) + l1 * torch.norm(self.W, 2) - loss.backward(retain_graph=True) - optimizer.step() # Does the update - - if verbose == True or i % verbose == 0: - output = self.forward(x) - loss_full = criterion(output, y) - print(i, loss_full) - optimizer.step() # Does the update - - def loss(self, x, y): - criterion = nn.MSELoss() - output = self.forward(x) - loss = criterion(output, y) - - return loss - - def fit_last_layer(self): - # same as before but different parameters - pass + def __init__(self, d, m, fun, output=2): + super(RandomMap, self).__init__() + self.W = torch.normal( + mean=torch.zeros(m, d, dtype=torch.float64), std=1.0 / np.sqrt(d * m) ** 2 + ) + self.W.requires_grad_(True) + self.w = torch.normal( + mean=torch.zeros(m, output, dtype=torch.float64), + std=1.0 / np.sqrt(d * m) ** 2, + ) + self.w.requires_grad_(True) + self.b = torch.normal( + mean=torch.zeros(output, dtype=torch.float64), std=1.0 / np.sqrt(d * m) ** 2 + ) + self.b.requires_grad_(True) + self.fun = fun + self.output = output + + def map(self, x): + y = self.fun(torch.mm(self.W, torch.t(x))) + return y + + def forward(self, x): + z = self.map(x) + z = torch.mm(torch.t(z), self.w) + return z + + def get_params(self): + return [self.W, self.w] + + def get_params_last(self): + return [self.w] + + def fit_map(self, x, y, epochs=1000, verbose=False, reg=0.1, lr=0.1): + criterion = nn.MSELoss() + + import torch.optim as optim + + optimizer = optim.SGD([self.W, self.w], lr=lr) + + batch_size = 100 + + for i in range(epochs): + for j in range(x.size()[0] // batch_size): + optimizer.zero_grad() # zero the gradient buffers + output = self.forward(x[j * batch_size : (j + 1) * batch_size]) + loss = criterion(output, y[j * batch_size : (j + 1) * batch_size]) + loss.backward(retain_graph=True) + optimizer.step() # Does the update + + if verbose == True or i % verbose == 0: + output = self.forward(x) + loss_full = criterion(output, y) + print(i, loss_full) + optimizer.step() # Does the update + + def fit_map_lasso(self, x, y, epochs=1000, verbose=False, reg=0.1, lr=0.1, l1=0.1): + criterion = nn.MSELoss() + + import torch.optim as optim + + optimizer = optim.SGD([self.W, self.w], lr=lr) + + batch_size = 100 + + for i in range(epochs): + for j in range(x.size()[0] // batch_size): + optimizer.zero_grad() # zero the gradient buffers + output = self.forward(x[j * batch_size : (j + 1) * batch_size]) + loss = criterion( + output, y[j * batch_size : (j + 1) * batch_size] + ) + l1 * torch.norm(self.W, 2) + loss.backward(retain_graph=True) + optimizer.step() # Does the update + + if verbose == True or i % verbose == 0: + output = self.forward(x) + loss_full = criterion(output, y) + print(i, loss_full) + optimizer.step() # Does the update + + def loss(self, x, y): + criterion = nn.MSELoss() + output = self.forward(x) + loss = criterion(output, y) + + return loss + + def fit_last_layer(self): + # same as before but different parameters + pass class SpecificMap(RandomMap): - def __init__(self, d, m, fun, map, output=2): - super(SpecificMap, self).__init__(d, m, fun, output=2) - self.map = map + def __init__(self, d, m, fun, map, output=2): + super(SpecificMap, self).__init__(d, m, fun, output=2) + self.map = map - def forward(self, x): - z = self.map(x) - z = torch.mm(torch.t(z), self.w) - return z + def forward(self, x): + z = self.map(x) + z = torch.mm(torch.t(z), self.w) + return z - def get_params(self): - return [self.w] + def get_params(self): + return [self.w] def RandomMapStacked(RandomMap): - def __init__(self, d, m, fun, output=2): - super(RandomMap, self).__init__() - self.W = torch.normal(mean=torch.zeros(m, d, dtype=torch.float64), std=1. / np.sqrt(d * m) ** 2) - self.W.requires_grad_(True) - self.w = torch.normal(mean=torch.zeros(m, output, dtype=torch.float64), std=1. / np.sqrt(d * m) ** 2) - self.w.requires_grad_(True) - self.b = torch.normal(mean=torch.zeros(m, 1, dtype=torch.float64), std=1. / np.sqrt(d * m) ** 2) - self.b.requires_grad_(True) - self.fun = fun - self.output = output - - def map(self, x): - y = self.fun(torch.mm(self.W, torch.t(x)) + self.b) - return y - - def fit_map(self, x, y): - pass + def __init__(self, d, m, fun, output=2): + super(RandomMap, self).__init__() + self.W = torch.normal( + mean=torch.zeros(m, d, dtype=torch.float64), std=1.0 / np.sqrt(d * m) ** 2 + ) + self.W.requires_grad_(True) + self.w = torch.normal( + mean=torch.zeros(m, output, dtype=torch.float64), + std=1.0 / np.sqrt(d * m) ** 2, + ) + self.w.requires_grad_(True) + self.b = torch.normal( + mean=torch.zeros(m, 1, dtype=torch.float64), std=1.0 / np.sqrt(d * m) ** 2 + ) + self.b.requires_grad_(True) + self.fun = fun + self.output = output + + def map(self, x): + y = self.fun(torch.mm(self.W, torch.t(x)) + self.b) + return y + + def fit_map(self, x, y): + pass class RandomOrthogonalMap(RandomMap): - def __init__(self, d, m, fun, output=1): - super(RandomMap, self).__init__() - self.m = m + def __init__(self, d, m, fun, output=1): + super(RandomMap, self).__init__() + self.m = m - self.R = torch.normal(mean=torch.zeros(m, d, dtype=torch.float64), std=1. / np.sqrt(d * m) ** 2) - self.R = nn.init.orthogonal_(self.R) - self.R.requires_grad_(True) + self.R = torch.normal( + mean=torch.zeros(m, d, dtype=torch.float64), std=1.0 / np.sqrt(d * m) ** 2 + ) + self.R = nn.init.orthogonal_(self.R) + self.R.requires_grad_(True) - self.w = torch.normal(mean=torch.zeros(m, output, dtype=torch.float64), std=1. / np.sqrt(d * m) ** 2) - self.w.requires_grad_(True) + self.w = torch.normal( + mean=torch.zeros(m, output, dtype=torch.float64), + std=1.0 / np.sqrt(d * m) ** 2, + ) + self.w.requires_grad_(True) - self.fun = fun - self.output = output + self.fun = fun + self.output = output - def map(self, x): - y = self.fun(torch.mm(self.R, torch.t(x))) - return y + def map(self, x): + y = self.fun(torch.mm(self.R, torch.t(x))) + return y - def fit_map(self, x, y, epochs=1000, verbose=False, reg=0.1, lr=0.1): - criterion = nn.MSELoss() + def fit_map(self, x, y, epochs=1000, verbose=False, reg=0.1, lr=0.1): + criterion = nn.MSELoss() - import torch.optim as optim + import torch.optim as optim - optimizer = optim.SGD([self.R, self.w], lr=lr) - orth_loss = torch.norm(torch.mm(self.R, torch.t(self.R)) - torch.eye(self.m, self.m, dtype=torch.float64)) ** 2 + optimizer = optim.SGD([self.R, self.w], lr=lr) + orth_loss = ( + torch.norm( + torch.mm(self.R, torch.t(self.R)) + - torch.eye(self.m, self.m, dtype=torch.float64) + ) + ** 2 + ) - batch_size = 100 + batch_size = 100 - for i in range(epochs): - for j in range(x.size()[0] // batch_size): - optimizer.zero_grad() # zero the gradient buffers - output = self.forward(x[j * batch_size:(j + 1) * batch_size]) - loss = criterion(output, y[j * batch_size:(j + 1) * batch_size]) + reg * orth_loss - loss.backward(retain_graph=True) - optimizer.step() # Does the update + for i in range(epochs): + for j in range(x.size()[0] // batch_size): + optimizer.zero_grad() # zero the gradient buffers + output = self.forward(x[j * batch_size : (j + 1) * batch_size]) + loss = ( + criterion(output, y[j * batch_size : (j + 1) * batch_size]) + + reg * orth_loss + ) + loss.backward(retain_graph=True) + optimizer.step() # Does the update - if verbose == True or i % verbose == 0: - output = self.forward(x) - loss_full = criterion(output, y) + reg * orth_loss - print(i, loss_full) + if verbose == True or i % verbose == 0: + output = self.forward(x) + loss_full = criterion(output, y) + reg * orth_loss + print(i, loss_full) -class RandomNestedMap(): +class RandomNestedMap: - def __init__(self): - pass + def __init__(self): + pass if __name__ == "__main__": - ridge = lambda x: torch.tanh(x) + ridge = lambda x: torch.tanh(x) - N = 1000 - d = 10 - m = 2 + N = 1000 + d = 10 + m = 2 - NetOriginal = RandomMap(d, m, ridge) + NetOriginal = RandomMap(d, m, ridge) - x = 10 * torch.normal(mean=torch.zeros(N, d, dtype=torch.float64) + 2, std=100.) - y = NetOriginal.forward(x) + x = 10 * torch.normal(mean=torch.zeros(N, d, dtype=torch.float64) + 2, std=100.0) + y = NetOriginal.forward(x) - Net = RandomMap(d, m, ridge) - Net.fit_map(x, y) + Net = RandomMap(d, m, ridge) + Net.fit_map(x, y) diff --git a/stpy/embeddings/transformations.py b/stpy/embeddings/transformations.py index 84f58a0..75cc74b 100755 --- a/stpy/embeddings/transformations.py +++ b/stpy/embeddings/transformations.py @@ -9,66 +9,68 @@ class Transformation(Embedding): - def __init__(self): - pass + def __init__(self): + pass - def embed(self, x): - pass + def embed(self, x): + pass - def linear_embedding(self): - embed = lambda x: x - return embed + def linear_embedding(self): + embed = lambda x: x + return embed - def create_polynomial_embeding(self, degree, d, kappa=1., bias=False): - """ - create polynomial embeding + def create_polynomial_embeding(self, degree, d, kappa=1.0, bias=False): + """ + create polynomial embeding - :param degree: - :param d: - :return: - """ - m = int(comb(degree + d - 1, degree - 1)) + int(bias) - poly = PolynomialFeatures(degree, include_bias=bias) - embed = lambda x: kappa * torch.from_numpy(poly.fit_transform(x.numpy())) - return embed, m - return (nodes, weights) + :param degree: + :param d: + :return: + """ + m = int(comb(degree + d - 1, degree - 1)) + int(bias) + poly = PolynomialFeatures(degree, include_bias=bias) + embed = lambda x: kappa * torch.from_numpy(poly.fit_transform(x.numpy())) + return embed, m + return (nodes, weights) - def embed(self, x): - (times, d) = tuple(x.size()) - # z = torch.from_numpy(np.zeros(shape=(self.m, times),dtype=x.dtype)) - z = torch.zeros(self.m, times, dtype=x.dtype) - q = torch.mm(self.W[:, 0:d], torch.t(x)) - z[0:int(self.m / 2), :] = torch.cos(q) - z[int(self.m / 2):self.m, :] = torch.sin(q) - return torch.t(z) + def embed(self, x): + (times, d) = tuple(x.size()) + # z = torch.from_numpy(np.zeros(shape=(self.m, times),dtype=x.dtype)) + z = torch.zeros(self.m, times, dtype=x.dtype) + q = torch.mm(self.W[:, 0:d], torch.t(x)) + z[0 : int(self.m / 2), :] = torch.cos(q) + z[int(self.m / 2) : self.m, :] = torch.sin(q) + return torch.t(z) - def create_fourier_embeding(self, cutoff, d, domain, bias=False): - self.m = 2 * cutoff - 2 * int(bias) - self.d = d - omegas = np.arange(int(bias), cutoff, 1) * 2. * np.pi / (2 * domain) - print(omegas) - v = [omegas for omega in range(self.d)] - self.W = torch.from_numpy(helper.cartesian(v)) - embed = lambda x: self.embed(x) - return embed, self.m + def create_fourier_embeding(self, cutoff, d, domain, bias=False): + self.m = 2 * cutoff - 2 * int(bias) + self.d = d + omegas = np.arange(int(bias), cutoff, 1) * 2.0 * np.pi / (2 * domain) + print(omegas) + v = [omegas for omega in range(self.d)] + self.W = torch.from_numpy(helper.cartesian(v)) + embed = lambda x: self.embed(x) + return embed, self.m - def create_cosine_embeding(self, cutoff, d, domain): - self.m = cutoff - self.d = d - omegas = np.arange(0, cutoff, 1) * 2. * np.pi / (2 * domain) - print(omegas) - v = [omegas for omega in range(self.d)] - self.W = torch.from_numpy(helper.cartesian(v)) - embed = lambda x: torch.t(torch.cos(torch.mm(self.W[:, 0:d], torch.t(x)))) - return embed, self.m + def create_cosine_embeding(self, cutoff, d, domain): + self.m = cutoff + self.d = d + omegas = np.arange(0, cutoff, 1) * 2.0 * np.pi / (2 * domain) + print(omegas) + v = [omegas for omega in range(self.d)] + self.W = torch.from_numpy(helper.cartesian(v)) + embed = lambda x: torch.t(torch.cos(torch.mm(self.W[:, 0:d], torch.t(x)))) + return embed, self.m - def create_cosine_power_embeding(self, cutoff, d, domain): - self.m = cutoff + 1 - self.d = d - print(np.logspace(0, cutoff, num=cutoff + 1, base=2)) - omegas = np.logspace(0, cutoff, num=cutoff + 1, base=2) * 2. * np.pi / (2 * domain) - print(omegas) - v = [omegas for omega in range(self.d)] - self.W = torch.from_numpy(helper.cartesian(v)) - embed = lambda x: torch.t(torch.cos(torch.mm(self.W[:, 0:d], torch.t(x)))) - return embed, self.m + def create_cosine_power_embeding(self, cutoff, d, domain): + self.m = cutoff + 1 + self.d = d + print(np.logspace(0, cutoff, num=cutoff + 1, base=2)) + omegas = ( + np.logspace(0, cutoff, num=cutoff + 1, base=2) * 2.0 * np.pi / (2 * domain) + ) + print(omegas) + v = [omegas for omega in range(self.d)] + self.W = torch.from_numpy(helper.cartesian(v)) + embed = lambda x: torch.t(torch.cos(torch.mm(self.W[:, 0:d], torch.t(x)))) + return embed, self.m diff --git a/stpy/embeddings/weighted_embedding.py b/stpy/embeddings/weighted_embedding.py index 0c03c99..6ff5a85 100644 --- a/stpy/embeddings/weighted_embedding.py +++ b/stpy/embeddings/weighted_embedding.py @@ -5,11 +5,7 @@ class WeightedEmbedding(Embedding): - def __init__(self, - embedding: Embedding, - weights = None, - weight_function = None - ): + def __init__(self, embedding: Embedding, weights=None, weight_function=None): self.base_embedding = embedding self.m = self.base_embedding.get_m() self.weights = weights @@ -29,8 +25,3 @@ def embed(self, xtest): return Phi @ np.diag(self.weights) else: return Phi @ np.diag(self.weight_function(self.base_embedding)) - - - - - diff --git a/stpy/estimator.py b/stpy/estimator.py index ec00812..d107bf4 100755 --- a/stpy/estimator.py +++ b/stpy/estimator.py @@ -12,632 +12,902 @@ from stpy.helpers import helper from stpy.optim.custom_optimizers import bisection + class Estimator(ABC): - def fit(self): - pass - - @abstractmethod - def ucb(self, x): - pass - - @abstractmethod - def lcb(self, x): - pass - - def load_data(self,d): - self.x = d[0] - self.y = d[1] - - def log_marginal(self, kernel, X, weight): - func = kernel.get_kernel() - K = func(self.x, self.x, **X) + torch.eye(self.n, dtype=torch.float64) * self.s * self.s - L = torch.linalg.cholesky(K) - logdet = -0.5 * 2 * torch.sum(torch.log(torch.diag(L))) * weight - alpha = torch.cholesky_solve(self.y, L) - logprob = -0.5 * torch.mm(torch.t(self.y), alpha) + logdet - logprob = -logprob - return logprob - - def optimize_params_general(self, params={}, restarts=2, - optimizer="pymanopt", maxiter=1000, - mingradnorm=1e-4, regularizer_func=None, - verbose=False, scale=1., weight=1., save = False, - save_name = 'model.np', parallel = False, cores = None): - """ - - :param params: - :param restarts: - :param optimizer: - :param maxiter: - :param mingradnorm: - :param regularizer_func: - :param verbose: - :return: - """ - manifolds = [] - bounds = [] - init_values = [] - - for key, dict_params in params.items(): - for var_name, value in dict_params.items(): - init_value, manifold, bound = value - manifolds.append(manifold) - bounds.append(bound) - init_values.append(init_value) - - if optimizer == "pymanopt": - - manifold = Product(tuple(manifolds)) - - @pymanopt.function.pytorch(manifold) - def cost(*args): - # print (args) - input_dict = {} - i = 0 - for key, dict_params in params.items(): - small_param = {} - for var_name, value in dict_params.items(): - small_param[var_name] = args[i] - i = i + 1 - input_dict[key] = small_param - - if regularizer_func is not None: - f = self.log_marginal(self.kernel_object, input_dict, weight) + regularizer_func(args) - else: - f = self.log_marginal(self.kernel_object, input_dict, weight) - return f - - problem = pymanopt.Problem(manifold, cost=cost) - solver = SteepestDescent(verbosity = verbose , max_iterations=maxiter, min_gradient_norm=mingradnorm) - - # get initial point - objective_values = [] - objective_params = [] - - for rep in range(restarts): - x_init = [] - for index, man in enumerate(manifolds): - if init_values[index] is None: - x_sub = man.random_point() * scale - else: - x_sub = np.array([init_values[index]]) - x_init.append(x_sub) - # try: - res = solver.run(problem, initial_point=x_init) - - objective_params.append(res.point) - objective_values.append(res.cost)#log['final_values']['f(x)']) - # except Exception as e: - # print (e) - # print ("Optimization restart failed:", x_init) - # pick the smallest objective - best_index = np.argmin(objective_values) - x_opt = [torch.from_numpy(j) for j in objective_params[best_index]] - - elif optimizer == "scipy": - cost_numpy = lambda x: cost(x).detach.numpy() - egrad_numpy = lambda x: egrad(x).detach().numpy() - - elif optimizer == "bisection": - - def cost(x): - input_dict = self.kernel_object.params_dict - counter = 0 - for key, dict_params in params.items(): - for var_name, value in dict_params.items(): - input_dict[key][var_name] = x - counter += 1 - - if regularizer_func is not None: - f = self.log_marginal(self.kernel_object, input_dict, weight) + regularizer_func(x) - else: - f = self.log_marginal(self.kernel_object, input_dict, weight) - return f - - a,b = bounds[0] - x_opt = [bisection(cost,a,b,100)] - - elif optimizer == "pytorch-minimize": - var_names = [] - dims = [0,] - for key, dict_params in params.items(): - for var_name, value in dict_params.items(): - init_value, manifold, bound = value - - manifolds.append(manifold) - bounds.append(bound) - init_values.append(init_value) - var_names.append(var_name) - dims.append(manifold.dim) - - dims = np.cumsum(dims).astype(int) - - def cost(x): - input_dict = self.kernel_object.params_dict - counter = 0 - for key, dict_params in params.items(): - for var_name, value in dict_params.items(): - if key != "likelihood": - input_dict[key][var_name] = x[dims[counter]:dims[counter+1]] - else: - self.s = x[dims[counter]:dims[counter+1]] - counter += 1 - - if regularizer_func is not None: - f = self.log_marginal(self.kernel_object, input_dict, weight) + regularizer_func(x) - else: - f = self.log_marginal(self.kernel_object, input_dict, weight) - return f - - objective_values = [] - objective_params = [] - x_opt = [] - - dim = dims[-1] - self.prepared_log_marginal = False - for rep in range(restarts): - #try: - if init_values[0] is None: - x_init = torch.randn(size=(dim, 1)).double().view(-1)**2 * scale - else: - x_init = init_values[0](dim) - - if bounds[0] is None: - res = minimize_torch(cost, x_init, method='l-bfgs', tol=1e-10, disp=verbose + 1, - options={'max_iter': maxiter, 'gtol':mingradnorm}) - objective_params.append(res.x) - objective_values.append(res.fun) - else: - print ("Constrained optimization with bounds", bounds[0]) - res = minimize(cost, x_init.numpy(), backend='torch', method='L-BFGS-B', - bounds=bounds[0], precision='float64', tol=1e-8, - options={'ftol': 1e-10, - 'gtol': mingradnorm, 'eps': 1e-08, - 'maxfun': 15000, 'maxiter': maxiter, - 'maxls': 20, 'disp' : verbose + 1}) - - objective_params.append(torch.from_numpy(res.x)) - objective_values.append(torch.from_numpy(res.fun)) - #except Exception as e: - # print(e) - # save models - - if save: - vals = {'params': objective_params, - 'evidence':objective_values, - 'repeats':restarts, - 'dim':dims, - 'param_names':params} - - with open(save_name, 'wb') as f: - pickle.dump(vals, f) - - - best_index = np.argmin(objective_values) - - counter = 0 - for key, dict_params in params.items(): - for var_name, value in dict_params.items(): - x_opt.append(objective_params[best_index][dims[counter]:dims[counter+1]]) - counter += 1 - - elif optimizer == "discrete": - values = [] - configurations = manifolds[0] - for config in manifolds[0]: - values.append(cost(config)) - - best_index = np.argmin(values) - x_opt = [configurations[best_index]] - else: - raise AssertionError("Optimizer not implemented.") - - # put back into default dic - i = 0 - for key, dict_params in params.items(): - for var_name, value in dict_params.items(): - if key == "likelihood": - self.s = x_opt[i] - - else: - self.kernel_object.params_dict[key][var_name] = x_opt[i] - i = i + 1 - - # print ("--------- Finished. ------------") - # print (self.kernel_object.params_dict) - - # disable back_prop - self.back_prop = False - - # refit the model - self.fitted = False - print(self.description()) - self.fit_gp(self.x, self.y) - return True - - def load_params(self, objective_params, params, dims): - self.fig = False - self.back_prop = False - x_opt = [] - counter = 0 - for key, dict_params in params.items(): - for var_name, value in dict_params.items(): - x_opt.append(objective_params[dims[counter]:dims[counter + 1]]) - counter += 1 - - counter = 0 - for key, dict_params in params.items(): - for var_name, value in dict_params.items(): - self.kernel_object.params_dict[key][var_name] = x_opt[counter] - counter += 1 - - print(self.description()) - - - - def visualize_function(self, xtest, f_trues, filename=None, colors=None, figsize = (15, 7)): - d = xtest.size()[1] - if d == 1: - if isinstance(f_trues, list): - for f_true in f_trues: - plt.plot(xtest, f_true(xtest)) - else: - plt.plot(xtest, f_trues(xtest)) - elif d == 2: - from scipy.interpolate import griddata - plt.figure(figsize=figsize) - plt.clf() - ax = plt.axes(projection='3d') - xx = xtest[:, 0].numpy() - yy = xtest[:, 1].numpy() - grid_x, grid_y = np.mgrid[min(xx):max(xx):100j, min(yy):max(yy):100j] - - - if isinstance(f_trues, list): - for index, f_true in enumerate(f_trues): - grid_z = griddata((xx, yy), f_true(xtest)[:, 0].numpy(), (grid_x, grid_y), method='linear') - if colors is not None: - color = colors[index] - ax.plot_surface(grid_x, grid_y, grid_z, alpha=0.4, color=color) - else: - grid_z = griddata((xx, yy), f_trues(xtest)[:, 0].numpy(), (grid_x, grid_y), method='linear') - ax.plot_surface(grid_x, grid_y, grid_z, alpha=0.4) - - if filename is not None: - plt.xticks(fontsize=20, rotation=0) - plt.yticks(fontsize=20, rotation=0) - plt.savefig(filename, dpi=300) - - def visualize_function_contour(self, xtest, f_true, - filename=None, levels=10, figsize=(15, 7), - alpha = 1., colorbar = True, cmap = 'hot', - mean_point = None, point_color = 'tab:red', ax = None, - fig = None): - d = xtest.size()[1] - if d == 1: - pass - elif d == 2: - from scipy.interpolate import griddata - xx = xtest[:, 0].numpy() - yy = xtest[:, 1].numpy() - grid_x, grid_y = np.mgrid[min(xx):max(xx):100j, min(yy):max(yy):100j] - f = f_true(xtest) - grid_z_f = griddata((xx, yy), f[:, 0].detach().numpy(), (grid_x, grid_y), method='linear') - if ax is None: - fig, ax = plt.subplots(figsize=figsize) - - cs = ax.contourf(grid_x, grid_y, grid_z_f, alpha = 0.5, cmap = cmap, linewidths=1, levels = [0,1]) - ax.contour(cs, colors='k', levels = [0.5], alpha = 0.5) - if colorbar: - cbar = fig.colorbar(cs) - # if self.x is not None: - # ax.scatter(self.x[:, 0].detach().numpy(), self.x[:, 1].detach().numpy(), c='r', s=100, marker="o") - ax.grid(c='k', ls='-', alpha=0.1) - if mean_point is not None: - plt.plot(mean_point[0],mean_point[1], 'o', ms = 10, color = point_color) - - if filename is not None: - plt.xticks(fontsize=24, rotation=0) - plt.yticks(fontsize=24, rotation=0) - plt.savefig(filename, dpi=300) - return fig, ax - # plt.show() - - def visualize(self, xtest,bounds = False, f_true=None, points=True, show=True, size=2, - norm=1, fig=True, sqrtbeta=2, constrained=None, d=None, - matheron_kernel=None, color = None, label = "", visualize_point = None): - - if not bounds: - [mu, std] = self.mean_std(xtest) - lcb = mu - sqrtbeta *std - ucb = mu + sqrtbeta *std - else: - print ("using bounds") - lcb = self.lcb(xtest) - ucb = self.ucb(xtest) - mu = self.mean(xtest) - - if d is None: - d = self.d - - - - if d == 1: - if fig == True: - plt.figure(figsize=(15, 7)) - plt.clf() - if self.x is not None: - plt.plot(self.x.detach().numpy(), self.y.detach().numpy(), 'ro', ms=10) - - if visualize_point is not None: - [x, y] = visualize_point - plt.plot(x, y, 'go', ms = 10) - - if size > 0: - - if matheron_kernel is not None: - z = self.sample_matheron(xtest, matheron_kernel, size=size).numpy().T - else: - z = self.sample(xtest, size=size).numpy().T - - for z_arr, label in zip(z, ['sample'] + [None for _ in range(size - 1)]): - plt.plot(xtest.view(-1).numpy(), z_arr, 'k--', lw=2, label=label) - - plt.fill_between(xtest.view(-1).numpy(), lcb.view(-1).numpy(), ucb.view(-1).numpy(), - color="#dddddd") - - if f_true is not None: - plt.plot(xtest.numpy(), f_true(xtest).numpy(), 'b-', lw=2, label="truth") - - if color is None: - plt.plot(xtest.numpy(), mu.numpy(), 'r-', lw=2, label="posterior mean") - else: - plt.plot(xtest.numpy(), mu.numpy(), linestyle = '-', lw=2, label="posterior mean"+label, color = color) - - plt.legend() - if show == True: - plt.show() - - elif d == 2: - from scipy.interpolate import griddata - plt.figure(figsize=(15, 7)) - plt.clf() - ax = plt.axes(projection='3d') - xx = xtest[:, 0].numpy() - yy = xtest[:, 1].numpy() - grid_x, grid_y = np.mgrid[min(xx):max(xx):100j, min(yy):max(yy):100j] - grid_z_mu = griddata((xx, yy), mu[:, 0].detach().numpy(), (grid_x, grid_y), method='linear') - ax.plot_surface(grid_x, grid_y, grid_z_mu, color='r', alpha=0.4, label="mu") - - if f_true is not None: - grid_z = griddata((xx, yy), f_true(xtest)[:, 0].numpy(), (grid_x, grid_y), method='linear') - ax.plot_surface(grid_x, grid_y, grid_z, color='b', alpha=0.4, label="truth") - - if points == True and self.fitted == True: - ax.scatter(self.x[:, 0].detach().numpy(), self.x[:, 1].detach().numpy(), self.y[:, 0].detach().numpy(), - c='r', s=100, marker="o", depthshade=False) - - if hasattr(self,"beta"): - if self.beta is not None: - beta = self.beta(norm=norm) - grid_z2 = griddata((xx, yy), (mu.detach() + beta * std.detach())[:, 0].detach().numpy(), - (grid_x, grid_y), method='linear') - ax.plot_surface(grid_x, grid_y, grid_z2, color='gray', alpha=0.2) - grid_z3 = griddata((xx, yy), (mu.detach() - beta * std.detach())[:, 0].detach().numpy(), - (grid_x, grid_y), method='linear') - ax.plot_surface(grid_x, grid_y, grid_z3, color='gray', alpha=0.2) - - ax.plot_surface(grid_x, grid_y, grid_z_mu, color='r', alpha=0.4) - # plt.title('Posterior mean prediction plus 2 st.deviation') - plt.show() - - else: - print("Visualization not implemented") - - def visualize_subopt(self, xtest, f_true=None, points=True, show=True, size=2, norm=1, fig=True, beta=2): - [mu, std] = self.mean_std(xtest) - - print("Visualizing in: ", self.d, "dimensions...") - - if self.d == 1: - if fig == True: - plt.figure(figsize=(15, 7)) - plt.clf() - if self.x is not None: - plt.plot(self.x.detach().numpy(), self.y.detach().numpy(), 'r+', ms=10, marker="o") - plt.plot(xtest.numpy(), self.sample(xtest, size=size).numpy(), 'k--', lw=2, label="sample") - plt.fill_between(xtest.numpy().flat, (mu - 2 * std).numpy().flat, (mu + 2 * std).numpy().flat, - color="#dddddd") - if f_true is not None: - plt.plot(xtest.numpy(), f_true(xtest).numpy(), 'b-', lw=2, label="truth") - plt.plot(xtest.numpy(), mu.numpy(), 'r-', lw=2, label="posterior mean") - - min = torch.max(mu - beta * std) - mask = (mu + beta * std < min) - v = torch.min(mu - beta * std).numpy() - 1 - plt.plot(xtest.numpy()[mask], 0 * xtest.numpy()[mask] + v, 'ko', lw=6, label="Discarted Region") - - plt.title('Posterior mean prediction plus 2 st.deviation') - plt.legend() - - if show == True: - plt.show() - - def visualize_slice(self, xtest, slice, show=True, eps=None, size=1, beta=2): - append = torch.ones(size=(xtest.size()[0], 1), dtype=torch.float64) * slice - xtest2 = torch.cat((xtest, append), dim=1) - - [mu, std] = self.mean_std(xtest2) - - plt.figure(figsize=(15, 7)) - plt.clf() - plt.plot(xtest.numpy(), self.sample(xtest, size=size).numpy(), 'k--', lw=2, label="sample") - print(std.size(), mu.size()) - if self.x is not None: - plt.plot(self.x[:, 0].detach().numpy(), self.y.detach().numpy(), 'r+', ms=10, marker="o") - plt.fill_between(xtest.numpy().flat, (mu - 2 * std).numpy().flat, (mu + 2 * std).numpy().flat, color="#dddddd") - plt.fill_between(xtest.numpy().flat, (mu + 2 * std).numpy().flat, (mu + 2 * std + 2 * self.s).numpy().flat, - color="#bbdefb") - plt.fill_between(xtest.numpy().flat, (mu - 2 * std - 2 * self.s).numpy().flat, (mu - 2 * std).numpy().flat, - color="#bbdefb") - - if eps is not None: - mask = (beta * std < eps) - v = torch.min(mu - beta * std - 2 * self.s).numpy() - plt.plot(xtest.numpy()[mask], 0 * xtest.numpy()[mask] + v, 'k', lw=6, - label="$\\mathcal{D}_E$ - $\\epsilon$ accurate domain in a subspace") - - plt.plot(xtest.numpy(), mu.numpy(), 'r-', lw=2, label="posterior mean") - plt.title('Posterior mean prediction plus 2 st.deviation') - plt.legend() - if show == True: - plt.show() - - def visualize_contour_with_gap(self, xtest, f_true=None, gap=None, show=False): - [mu, _] = self.mean_std(xtest) - - if self.d == 2: - from scipy.interpolate import griddata - xx = xtest[:, 0].detach().numpy() - yy = xtest[:, 1].detach().numpy() - grid_x, grid_y = np.mgrid[min(xx):max(xx):100j, min(yy):max(yy):100j] - grid_z_mu = griddata((xx, yy), mu[:, 0].detach().numpy(), (grid_x, grid_y), method='linear') - - fig, ax = plt.subplots(figsize=(15, 7)) - cs = ax.contourf(grid_x, grid_y, grid_z_mu) - ax.contour(cs, colors='k') - - ax.plot(self.x[:, 0].detach().numpy(), self.x[:, 1].detach().numpy(), 'ro', ms=10) - cbar = fig.colorbar(cs) - - ax.grid(c='k', ls='-', alpha=0.1) - - if f_true is not None: - f = f_true(xtest) - grid_z_f = griddata((xx, yy), f[:, 0].detach().numpy(), (grid_x, grid_y), method='linear') - fig, ax = plt.subplots(figsize=(15, 7)) - cs = ax.contourf(grid_x, grid_y, grid_z_f) - ax.contour(cs, colors='k') - cbar = fig.colorbar(cs) - ax.grid(c='k', ls='-', alpha=0.1) - if show == True: - plt.show() - - def visualize_contour(self, xtest, f_true=None, show=True, points=True, ms=5, levels=20): - [mu, _] = self.mean_std(xtest) - - if self.d == 2: - from scipy.interpolate import griddata - xx = xtest[:, 0].detach().numpy() - yy = xtest[:, 1].detach().numpy() - - grid_x, grid_y = np.mgrid[min(xx):max(xx):100j, min(yy):max(yy):100j] - grid_z_mu = griddata((xx, yy), mu[:, 0].detach().numpy(), (grid_x, grid_y), method='linear') - - fig, ax = plt.subplots(figsize=(15, 7)) - cs = ax.contourf(grid_x, grid_y, grid_z_mu) - ax.contour(cs, colors='k') - - if points == True: - ax.plot(self.x[:, 0].detach().numpy(), self.x[:, 1].detach().numpy(), 'wo', ms=ms, alpha=0.5) - cbar = fig.colorbar(cs) - ax.grid(c='k', ls='-', alpha=0.1) - - if f_true is not None: - f = f_true(xtest) - grid_z_f = griddata((xx, yy), f[:, 0].detach().numpy(), (grid_x, grid_y), method='linear') - fig, ax = plt.subplots(figsize=(15, 7)) - cs = ax.contourf(grid_x, grid_y, grid_z_f, levels=levels) - ax.contour(cs, colors='k') - cbar = fig.colorbar(cs) - ax.grid(c='k', ls='-', alpha=0.1) - if show == True: - plt.show() - return ax - - def visualize_quiver(self, xtest, size=2, norm=1): - [mu, std] = self.mean_std(xtest) - if self.d == 2: - from scipy.interpolate import griddata - plt.figure(figsize=(15, 7)) - plt.clf() - ax = plt.axes(projection='3d') - xx = xtest[:, 0].detach().numpy() - yy = xtest[:, 1].detach().numpy() - grid_x, grid_y = np.mgrid[min(xx):max(xx):100j, min(yy):max(yy):100j] - grid_z_mu = griddata((xx, yy), mu[:, 0].detach().numpy(), (grid_x, grid_y), method='linear') - # - - ax.scatter(self.x[:, 0].detach().numpy(), self.x[:, 1].detach().numpy(), self.y[:, 0].detach().numpy(), - c='r', s=100, marker="o", depthshade=False) - - if self.beta is not None: - beta = self.beta(norm=norm) - grid_z2 = griddata((xx, yy), (mu.detach() + beta * std.detach())[:, 0].detach().numpy(), - (grid_x, grid_y), method='linear') - ax.plot_surface(grid_x, grid_y, grid_z2, color='gray', alpha=0.2) - grid_z3 = griddata((xx, yy), (mu.detach() - beta * std.detach())[:, 0].detach().numpy(), - (grid_x, grid_y), method='linear') - ax.plot_surface(grid_x, grid_y, grid_z3, color='gray', alpha=0.2) - - ax.plot_surface(grid_x, grid_y, grid_z_mu, color='r', alpha=0.4) - plt.title('Posterior mean prediction plus 2 st.deviation') - - derivatives = torch.zeros(xtest.size()[0], 2) - for index, point in enumerate(xtest): - derivatives[index, :] = self.mean_gradient_hessian(point.view(-1, 2)) - print(derivatives[index, :]) - - print(derivatives.size()) - - grid_der_x_mu = griddata((xx, yy), derivatives[:, 0].detach().numpy(), (grid_x, grid_y), method='linear') - grid_der_y_mu = griddata((xx, yy), derivatives[:, 1].detach().numpy(), (grid_x, grid_y), method='linear') - - fig, ax = plt.subplots(figsize=(15, 7)) - cs = ax.contourf(grid_x, grid_y, grid_z_mu) - - ax.contour(cs, colors='k') - - # Plot grid. - ax.grid(c='k', ls='-', alpha=0.1) - ax.quiver(grid_x, grid_y, grid_der_x_mu, grid_der_y_mu) - - plt.show() - - else: - print("Visualization not implemented") + def fit(self): + pass + + @abstractmethod + def ucb(self, x): + pass + + @abstractmethod + def lcb(self, x): + pass + + def load_data(self, d): + self.x = d[0] + self.y = d[1] + + def log_marginal(self, kernel, X, weight): + func = kernel.get_kernel() + K = ( + func(self.x, self.x, **X) + + torch.eye(self.n, dtype=torch.float64) * self.s * self.s + ) + L = torch.linalg.cholesky(K) + logdet = -0.5 * 2 * torch.sum(torch.log(torch.diag(L))) * weight + alpha = torch.cholesky_solve(self.y, L) + logprob = -0.5 * torch.mm(torch.t(self.y), alpha) + logdet + logprob = -logprob + return logprob + + def optimize_params_general( + self, + params={}, + restarts=2, + optimizer="pymanopt", + maxiter=1000, + mingradnorm=1e-4, + regularizer_func=None, + verbose=False, + scale=1.0, + weight=1.0, + save=False, + save_name="model.np", + parallel=False, + cores=None, + ): + """ + + :param params: + :param restarts: + :param optimizer: + :param maxiter: + :param mingradnorm: + :param regularizer_func: + :param verbose: + :return: + """ + manifolds = [] + bounds = [] + init_values = [] + + for key, dict_params in params.items(): + for var_name, value in dict_params.items(): + init_value, manifold, bound = value + manifolds.append(manifold) + bounds.append(bound) + init_values.append(init_value) + + if optimizer == "pymanopt": + + manifold = Product(tuple(manifolds)) + + @pymanopt.function.pytorch(manifold) + def cost(*args): + # print (args) + input_dict = {} + i = 0 + for key, dict_params in params.items(): + small_param = {} + for var_name, value in dict_params.items(): + small_param[var_name] = args[i] + i = i + 1 + input_dict[key] = small_param + + if regularizer_func is not None: + f = self.log_marginal( + self.kernel_object, input_dict, weight + ) + regularizer_func(args) + else: + f = self.log_marginal(self.kernel_object, input_dict, weight) + return f + + problem = pymanopt.Problem(manifold, cost=cost) + solver = SteepestDescent( + verbosity=verbose, max_iterations=maxiter, min_gradient_norm=mingradnorm + ) + + # get initial point + objective_values = [] + objective_params = [] + + for rep in range(restarts): + x_init = [] + for index, man in enumerate(manifolds): + if init_values[index] is None: + x_sub = man.random_point() * scale + else: + x_sub = np.array([init_values[index]]) + x_init.append(x_sub) + # try: + res = solver.run(problem, initial_point=x_init) + + objective_params.append(res.point) + objective_values.append(res.cost) # log['final_values']['f(x)']) + # except Exception as e: + # print (e) + # print ("Optimization restart failed:", x_init) + # pick the smallest objective + best_index = np.argmin(objective_values) + x_opt = [torch.from_numpy(j) for j in objective_params[best_index]] + + elif optimizer == "scipy": + cost_numpy = lambda x: cost(x).detach.numpy() + egrad_numpy = lambda x: egrad(x).detach().numpy() + + elif optimizer == "bisection": + + def cost(x): + input_dict = self.kernel_object.params_dict + counter = 0 + for key, dict_params in params.items(): + for var_name, value in dict_params.items(): + input_dict[key][var_name] = x + counter += 1 + + if regularizer_func is not None: + f = self.log_marginal( + self.kernel_object, input_dict, weight + ) + regularizer_func(x) + else: + f = self.log_marginal(self.kernel_object, input_dict, weight) + return f + + a, b = bounds[0] + x_opt = [bisection(cost, a, b, 100)] + + elif optimizer == "pytorch-minimize": + var_names = [] + dims = [ + 0, + ] + for key, dict_params in params.items(): + for var_name, value in dict_params.items(): + init_value, manifold, bound = value + + manifolds.append(manifold) + bounds.append(bound) + init_values.append(init_value) + var_names.append(var_name) + dims.append(manifold.dim) + + dims = np.cumsum(dims).astype(int) + + def cost(x): + input_dict = self.kernel_object.params_dict + counter = 0 + for key, dict_params in params.items(): + for var_name, value in dict_params.items(): + if key != "likelihood": + input_dict[key][var_name] = x[ + dims[counter] : dims[counter + 1] + ] + else: + self.s = x[dims[counter] : dims[counter + 1]] + counter += 1 + + if regularizer_func is not None: + f = self.log_marginal( + self.kernel_object, input_dict, weight + ) + regularizer_func(x) + else: + f = self.log_marginal(self.kernel_object, input_dict, weight) + return f + + objective_values = [] + objective_params = [] + x_opt = [] + + dim = dims[-1] + self.prepared_log_marginal = False + for rep in range(restarts): + # try: + if init_values[0] is None: + x_init = torch.randn(size=(dim, 1)).double().view(-1) ** 2 * scale + else: + x_init = init_values[0](dim) + + if bounds[0] is None: + res = minimize_torch( + cost, + x_init, + method="l-bfgs", + tol=1e-10, + disp=verbose + 1, + options={"max_iter": maxiter, "gtol": mingradnorm}, + ) + objective_params.append(res.x) + objective_values.append(res.fun) + else: + print("Constrained optimization with bounds", bounds[0]) + res = minimize( + cost, + x_init.numpy(), + backend="torch", + method="L-BFGS-B", + bounds=bounds[0], + precision="float64", + tol=1e-8, + options={ + "ftol": 1e-10, + "gtol": mingradnorm, + "eps": 1e-08, + "maxfun": 15000, + "maxiter": maxiter, + "maxls": 20, + "disp": verbose + 1, + }, + ) + + objective_params.append(torch.from_numpy(res.x)) + objective_values.append(torch.from_numpy(res.fun)) + # except Exception as e: + # print(e) + # save models + + if save: + vals = { + "params": objective_params, + "evidence": objective_values, + "repeats": restarts, + "dim": dims, + "param_names": params, + } + + with open(save_name, "wb") as f: + pickle.dump(vals, f) + + best_index = np.argmin(objective_values) + + counter = 0 + for key, dict_params in params.items(): + for var_name, value in dict_params.items(): + x_opt.append( + objective_params[best_index][dims[counter] : dims[counter + 1]] + ) + counter += 1 + + elif optimizer == "discrete": + values = [] + configurations = manifolds[0] + for config in manifolds[0]: + values.append(cost(config)) + + best_index = np.argmin(values) + x_opt = [configurations[best_index]] + else: + raise AssertionError("Optimizer not implemented.") + + # put back into default dic + i = 0 + for key, dict_params in params.items(): + for var_name, value in dict_params.items(): + if key == "likelihood": + self.s = x_opt[i] + + else: + self.kernel_object.params_dict[key][var_name] = x_opt[i] + i = i + 1 + + # print ("--------- Finished. ------------") + # print (self.kernel_object.params_dict) + + # disable back_prop + self.back_prop = False + + # refit the model + self.fitted = False + print(self.description()) + self.fit_gp(self.x, self.y) + return True + + def load_params(self, objective_params, params, dims): + self.fig = False + self.back_prop = False + x_opt = [] + counter = 0 + for key, dict_params in params.items(): + for var_name, value in dict_params.items(): + x_opt.append(objective_params[dims[counter] : dims[counter + 1]]) + counter += 1 + + counter = 0 + for key, dict_params in params.items(): + for var_name, value in dict_params.items(): + self.kernel_object.params_dict[key][var_name] = x_opt[counter] + counter += 1 + + print(self.description()) + + def visualize_function( + self, xtest, f_trues, filename=None, colors=None, figsize=(15, 7) + ): + d = xtest.size()[1] + if d == 1: + if isinstance(f_trues, list): + for f_true in f_trues: + plt.plot(xtest, f_true(xtest)) + else: + plt.plot(xtest, f_trues(xtest)) + elif d == 2: + from scipy.interpolate import griddata + + plt.figure(figsize=figsize) + plt.clf() + ax = plt.axes(projection="3d") + xx = xtest[:, 0].numpy() + yy = xtest[:, 1].numpy() + grid_x, grid_y = np.mgrid[ + min(xx) : max(xx) : 100j, min(yy) : max(yy) : 100j + ] + + if isinstance(f_trues, list): + for index, f_true in enumerate(f_trues): + grid_z = griddata( + (xx, yy), + f_true(xtest)[:, 0].numpy(), + (grid_x, grid_y), + method="linear", + ) + if colors is not None: + color = colors[index] + ax.plot_surface(grid_x, grid_y, grid_z, alpha=0.4, color=color) + else: + grid_z = griddata( + (xx, yy), + f_trues(xtest)[:, 0].numpy(), + (grid_x, grid_y), + method="linear", + ) + ax.plot_surface(grid_x, grid_y, grid_z, alpha=0.4) + + if filename is not None: + plt.xticks(fontsize=20, rotation=0) + plt.yticks(fontsize=20, rotation=0) + plt.savefig(filename, dpi=300) + + def visualize_function_contour( + self, + xtest, + f_true, + filename=None, + levels=10, + figsize=(15, 7), + alpha=1.0, + colorbar=True, + cmap="hot", + mean_point=None, + point_color="tab:red", + ax=None, + fig=None, + ): + d = xtest.size()[1] + if d == 1: + pass + elif d == 2: + from scipy.interpolate import griddata + + xx = xtest[:, 0].numpy() + yy = xtest[:, 1].numpy() + grid_x, grid_y = np.mgrid[ + min(xx) : max(xx) : 100j, min(yy) : max(yy) : 100j + ] + f = f_true(xtest) + grid_z_f = griddata( + (xx, yy), f[:, 0].detach().numpy(), (grid_x, grid_y), method="linear" + ) + if ax is None: + fig, ax = plt.subplots(figsize=figsize) + + cs = ax.contourf( + grid_x, + grid_y, + grid_z_f, + alpha=0.5, + cmap=cmap, + linewidths=1, + levels=[0, 1], + ) + ax.contour(cs, colors="k", levels=[0.5], alpha=0.5) + if colorbar: + cbar = fig.colorbar(cs) + # if self.x is not None: + # ax.scatter(self.x[:, 0].detach().numpy(), self.x[:, 1].detach().numpy(), c='r', s=100, marker="o") + ax.grid(c="k", ls="-", alpha=0.1) + if mean_point is not None: + plt.plot(mean_point[0], mean_point[1], "o", ms=10, color=point_color) + + if filename is not None: + plt.xticks(fontsize=24, rotation=0) + plt.yticks(fontsize=24, rotation=0) + plt.savefig(filename, dpi=300) + return fig, ax + + # plt.show() + + def visualize( + self, + xtest, + bounds=False, + f_true=None, + points=True, + show=True, + size=2, + norm=1, + fig=True, + sqrtbeta=2, + constrained=None, + d=None, + matheron_kernel=None, + color=None, + label="", + visualize_point=None, + ): + + if not bounds: + [mu, std] = self.mean_std(xtest) + lcb = mu - sqrtbeta * std + ucb = mu + sqrtbeta * std + else: + print("using bounds") + lcb = self.lcb(xtest) + ucb = self.ucb(xtest) + mu = self.mean(xtest) + + if d is None: + d = self.d + + if d == 1: + if fig == True: + plt.figure(figsize=(15, 7)) + plt.clf() + if self.x is not None: + plt.plot(self.x.detach().numpy(), self.y.detach().numpy(), "ro", ms=10) + + if visualize_point is not None: + [x, y] = visualize_point + plt.plot(x, y, "go", ms=10) + + if size > 0: + + if matheron_kernel is not None: + z = ( + self.sample_matheron(xtest, matheron_kernel, size=size) + .numpy() + .T + ) + else: + z = self.sample(xtest, size=size).numpy().T + + for z_arr, label in zip( + z, ["sample"] + [None for _ in range(size - 1)] + ): + plt.plot(xtest.view(-1).numpy(), z_arr, "k--", lw=2, label=label) + + plt.fill_between( + xtest.view(-1).numpy(), + lcb.view(-1).numpy(), + ucb.view(-1).numpy(), + color="#dddddd", + ) + + if f_true is not None: + plt.plot( + xtest.numpy(), f_true(xtest).numpy(), "b-", lw=2, label="truth" + ) + + if color is None: + plt.plot(xtest.numpy(), mu.numpy(), "r-", lw=2, label="posterior mean") + else: + plt.plot( + xtest.numpy(), + mu.numpy(), + linestyle="-", + lw=2, + label="posterior mean" + label, + color=color, + ) + + plt.legend() + if show == True: + plt.show() + + elif d == 2: + from scipy.interpolate import griddata + + plt.figure(figsize=(15, 7)) + plt.clf() + ax = plt.axes(projection="3d") + xx = xtest[:, 0].numpy() + yy = xtest[:, 1].numpy() + grid_x, grid_y = np.mgrid[ + min(xx) : max(xx) : 100j, min(yy) : max(yy) : 100j + ] + grid_z_mu = griddata( + (xx, yy), mu[:, 0].detach().numpy(), (grid_x, grid_y), method="linear" + ) + ax.plot_surface(grid_x, grid_y, grid_z_mu, color="r", alpha=0.4, label="mu") + + if f_true is not None: + grid_z = griddata( + (xx, yy), + f_true(xtest)[:, 0].numpy(), + (grid_x, grid_y), + method="linear", + ) + ax.plot_surface( + grid_x, grid_y, grid_z, color="b", alpha=0.4, label="truth" + ) + + if points == True and self.fitted == True: + ax.scatter( + self.x[:, 0].detach().numpy(), + self.x[:, 1].detach().numpy(), + self.y[:, 0].detach().numpy(), + c="r", + s=100, + marker="o", + depthshade=False, + ) + + if hasattr(self, "beta"): + if self.beta is not None: + beta = self.beta(norm=norm) + grid_z2 = griddata( + (xx, yy), + (mu.detach() + beta * std.detach())[:, 0].detach().numpy(), + (grid_x, grid_y), + method="linear", + ) + ax.plot_surface(grid_x, grid_y, grid_z2, color="gray", alpha=0.2) + grid_z3 = griddata( + (xx, yy), + (mu.detach() - beta * std.detach())[:, 0].detach().numpy(), + (grid_x, grid_y), + method="linear", + ) + ax.plot_surface(grid_x, grid_y, grid_z3, color="gray", alpha=0.2) + + ax.plot_surface(grid_x, grid_y, grid_z_mu, color="r", alpha=0.4) + # plt.title('Posterior mean prediction plus 2 st.deviation') + plt.show() + + else: + print("Visualization not implemented") + + def visualize_subopt( + self, + xtest, + f_true=None, + points=True, + show=True, + size=2, + norm=1, + fig=True, + beta=2, + ): + [mu, std] = self.mean_std(xtest) + + print("Visualizing in: ", self.d, "dimensions...") + + if self.d == 1: + if fig == True: + plt.figure(figsize=(15, 7)) + plt.clf() + if self.x is not None: + plt.plot( + self.x.detach().numpy(), + self.y.detach().numpy(), + "r+", + ms=10, + marker="o", + ) + plt.plot( + xtest.numpy(), + self.sample(xtest, size=size).numpy(), + "k--", + lw=2, + label="sample", + ) + plt.fill_between( + xtest.numpy().flat, + (mu - 2 * std).numpy().flat, + (mu + 2 * std).numpy().flat, + color="#dddddd", + ) + if f_true is not None: + plt.plot( + xtest.numpy(), f_true(xtest).numpy(), "b-", lw=2, label="truth" + ) + plt.plot(xtest.numpy(), mu.numpy(), "r-", lw=2, label="posterior mean") + + min = torch.max(mu - beta * std) + mask = mu + beta * std < min + v = torch.min(mu - beta * std).numpy() - 1 + plt.plot( + xtest.numpy()[mask], + 0 * xtest.numpy()[mask] + v, + "ko", + lw=6, + label="Discarted Region", + ) + + plt.title("Posterior mean prediction plus 2 st.deviation") + plt.legend() + + if show == True: + plt.show() + + def visualize_slice(self, xtest, slice, show=True, eps=None, size=1, beta=2): + append = torch.ones(size=(xtest.size()[0], 1), dtype=torch.float64) * slice + xtest2 = torch.cat((xtest, append), dim=1) + + [mu, std] = self.mean_std(xtest2) + + plt.figure(figsize=(15, 7)) + plt.clf() + plt.plot( + xtest.numpy(), + self.sample(xtest, size=size).numpy(), + "k--", + lw=2, + label="sample", + ) + print(std.size(), mu.size()) + if self.x is not None: + plt.plot( + self.x[:, 0].detach().numpy(), + self.y.detach().numpy(), + "r+", + ms=10, + marker="o", + ) + plt.fill_between( + xtest.numpy().flat, + (mu - 2 * std).numpy().flat, + (mu + 2 * std).numpy().flat, + color="#dddddd", + ) + plt.fill_between( + xtest.numpy().flat, + (mu + 2 * std).numpy().flat, + (mu + 2 * std + 2 * self.s).numpy().flat, + color="#bbdefb", + ) + plt.fill_between( + xtest.numpy().flat, + (mu - 2 * std - 2 * self.s).numpy().flat, + (mu - 2 * std).numpy().flat, + color="#bbdefb", + ) + + if eps is not None: + mask = beta * std < eps + v = torch.min(mu - beta * std - 2 * self.s).numpy() + plt.plot( + xtest.numpy()[mask], + 0 * xtest.numpy()[mask] + v, + "k", + lw=6, + label="$\\mathcal{D}_E$ - $\\epsilon$ accurate domain in a subspace", + ) + + plt.plot(xtest.numpy(), mu.numpy(), "r-", lw=2, label="posterior mean") + plt.title("Posterior mean prediction plus 2 st.deviation") + plt.legend() + if show == True: + plt.show() + + def visualize_contour_with_gap(self, xtest, f_true=None, gap=None, show=False): + [mu, _] = self.mean_std(xtest) + + if self.d == 2: + from scipy.interpolate import griddata + + xx = xtest[:, 0].detach().numpy() + yy = xtest[:, 1].detach().numpy() + grid_x, grid_y = np.mgrid[ + min(xx) : max(xx) : 100j, min(yy) : max(yy) : 100j + ] + grid_z_mu = griddata( + (xx, yy), mu[:, 0].detach().numpy(), (grid_x, grid_y), method="linear" + ) + + fig, ax = plt.subplots(figsize=(15, 7)) + cs = ax.contourf(grid_x, grid_y, grid_z_mu) + ax.contour(cs, colors="k") + + ax.plot( + self.x[:, 0].detach().numpy(), + self.x[:, 1].detach().numpy(), + "ro", + ms=10, + ) + cbar = fig.colorbar(cs) + + ax.grid(c="k", ls="-", alpha=0.1) + + if f_true is not None: + f = f_true(xtest) + grid_z_f = griddata( + (xx, yy), + f[:, 0].detach().numpy(), + (grid_x, grid_y), + method="linear", + ) + fig, ax = plt.subplots(figsize=(15, 7)) + cs = ax.contourf(grid_x, grid_y, grid_z_f) + ax.contour(cs, colors="k") + cbar = fig.colorbar(cs) + ax.grid(c="k", ls="-", alpha=0.1) + if show == True: + plt.show() + + def visualize_contour( + self, xtest, f_true=None, show=True, points=True, ms=5, levels=20 + ): + [mu, _] = self.mean_std(xtest) + + if self.d == 2: + from scipy.interpolate import griddata + + xx = xtest[:, 0].detach().numpy() + yy = xtest[:, 1].detach().numpy() + + grid_x, grid_y = np.mgrid[ + min(xx) : max(xx) : 100j, min(yy) : max(yy) : 100j + ] + grid_z_mu = griddata( + (xx, yy), mu[:, 0].detach().numpy(), (grid_x, grid_y), method="linear" + ) + + fig, ax = plt.subplots(figsize=(15, 7)) + cs = ax.contourf(grid_x, grid_y, grid_z_mu) + ax.contour(cs, colors="k") + + if points == True: + ax.plot( + self.x[:, 0].detach().numpy(), + self.x[:, 1].detach().numpy(), + "wo", + ms=ms, + alpha=0.5, + ) + cbar = fig.colorbar(cs) + ax.grid(c="k", ls="-", alpha=0.1) + + if f_true is not None: + f = f_true(xtest) + grid_z_f = griddata( + (xx, yy), + f[:, 0].detach().numpy(), + (grid_x, grid_y), + method="linear", + ) + fig, ax = plt.subplots(figsize=(15, 7)) + cs = ax.contourf(grid_x, grid_y, grid_z_f, levels=levels) + ax.contour(cs, colors="k") + cbar = fig.colorbar(cs) + ax.grid(c="k", ls="-", alpha=0.1) + if show == True: + plt.show() + return ax + + def visualize_quiver(self, xtest, size=2, norm=1): + [mu, std] = self.mean_std(xtest) + if self.d == 2: + from scipy.interpolate import griddata + + plt.figure(figsize=(15, 7)) + plt.clf() + ax = plt.axes(projection="3d") + xx = xtest[:, 0].detach().numpy() + yy = xtest[:, 1].detach().numpy() + grid_x, grid_y = np.mgrid[ + min(xx) : max(xx) : 100j, min(yy) : max(yy) : 100j + ] + grid_z_mu = griddata( + (xx, yy), mu[:, 0].detach().numpy(), (grid_x, grid_y), method="linear" + ) + # + + ax.scatter( + self.x[:, 0].detach().numpy(), + self.x[:, 1].detach().numpy(), + self.y[:, 0].detach().numpy(), + c="r", + s=100, + marker="o", + depthshade=False, + ) + + if self.beta is not None: + beta = self.beta(norm=norm) + grid_z2 = griddata( + (xx, yy), + (mu.detach() + beta * std.detach())[:, 0].detach().numpy(), + (grid_x, grid_y), + method="linear", + ) + ax.plot_surface(grid_x, grid_y, grid_z2, color="gray", alpha=0.2) + grid_z3 = griddata( + (xx, yy), + (mu.detach() - beta * std.detach())[:, 0].detach().numpy(), + (grid_x, grid_y), + method="linear", + ) + ax.plot_surface(grid_x, grid_y, grid_z3, color="gray", alpha=0.2) + + ax.plot_surface(grid_x, grid_y, grid_z_mu, color="r", alpha=0.4) + plt.title("Posterior mean prediction plus 2 st.deviation") + + derivatives = torch.zeros(xtest.size()[0], 2) + for index, point in enumerate(xtest): + derivatives[index, :] = self.mean_gradient_hessian(point.view(-1, 2)) + print(derivatives[index, :]) + + print(derivatives.size()) + + grid_der_x_mu = griddata( + (xx, yy), + derivatives[:, 0].detach().numpy(), + (grid_x, grid_y), + method="linear", + ) + grid_der_y_mu = griddata( + (xx, yy), + derivatives[:, 1].detach().numpy(), + (grid_x, grid_y), + method="linear", + ) + + fig, ax = plt.subplots(figsize=(15, 7)) + cs = ax.contourf(grid_x, grid_y, grid_z_mu) + + ax.contour(cs, colors="k") + + # Plot grid. + ax.grid(c="k", ls="-", alpha=0.1) + ax.quiver(grid_x, grid_y, grid_der_x_mu, grid_der_y_mu) + + plt.show() + + else: + print("Visualization not implemented") if __name__ == "__main__": - from stpy.continuous_processes.kernelized_features import KernelizedFeatures - from stpy.kernels import KernelFunction - from stpy.embeddings.embedding import HermiteEmbedding - import stpy - import torch - import matplotlib.pyplot as plt - import numpy as np - - n = 1024 - N = 256 - gamma = 0.09 - s = 0.1 - # benchmark = stpy.test_functions.benchmarks.GaussianProcessSample(d =1, gamma = gamma, sigma = s, n = n) - benchmark = stpy.test_functions.benchmarks.Simple1DFunction(d=1, sigma=s) - - x = benchmark.initial_guess(N, adv_inv=True) - y = benchmark.eval(x) - xtest = benchmark.interval(1024) - - # GP = GaussianProcess(gamma=gamma, s=s) - # GP.fit_gp(x, y) - # GP.visualize(xtest, show=False, size=5) - # plt.show() - - m = 64 - kernel = KernelFunction(gamma=gamma) - embedding = HermiteEmbedding(gamma=gamma, m=m) - RFF = KernelizedFeatures(embedding=embedding, s=s, m=m) - RFF.fit_gp(x, y) - RFF.visualize(xtest, fig=False, show=False, size=5, matheron_kernel=kernel) - plt.show() + from stpy.continuous_processes.kernelized_features import KernelizedFeatures + from stpy.kernels import KernelFunction + from stpy.embeddings.embedding import HermiteEmbedding + import stpy + import torch + import matplotlib.pyplot as plt + import numpy as np + + n = 1024 + N = 256 + gamma = 0.09 + s = 0.1 + # benchmark = stpy.test_functions.benchmarks.GaussianProcessSample(d =1, gamma = gamma, sigma = s, n = n) + benchmark = stpy.test_functions.benchmarks.Simple1DFunction(d=1, sigma=s) + + x = benchmark.initial_guess(N, adv_inv=True) + y = benchmark.eval(x) + xtest = benchmark.interval(1024) + + # GP = GaussianProcess(gamma=gamma, s=s) + # GP.fit_gp(x, y) + # GP.visualize(xtest, show=False, size=5) + # plt.show() + + m = 64 + kernel = KernelFunction(gamma=gamma) + embedding = HermiteEmbedding(gamma=gamma, m=m) + RFF = KernelizedFeatures(embedding=embedding, s=s, m=m) + RFF.fit_gp(x, y) + RFF.visualize(xtest, fig=False, show=False, size=5, matheron_kernel=kernel) + plt.show() diff --git a/stpy/feature_importance/feature_ranker.py b/stpy/feature_importance/feature_ranker.py index 17ccbe9..42c131a 100644 --- a/stpy/feature_importance/feature_ranker.py +++ b/stpy/feature_importance/feature_ranker.py @@ -4,46 +4,44 @@ from stpy.estimator import Estimator import copy -class FeatureRanker(): - - def __init__(self, - model: Estimator, - mode: str = 'explained variance' - ): - self.model = model - self.mode = mode - - if not hasattr(self.model, "kernel_object"): - print ("Invalid estimator structure to run feature importance analysis") - - def importance(self): - - if self.mode == 'explained variance': - return self.one_off_importance() - elif self.mode == 'cross_validation': - raise NotImplementedError("This is not implemented.") - - def one_off_importance(self): - n,d = self.model.x.size() - x = self.model.x - y = self.model.y - # iterate over features and - importance = torch.zeros(size=(d,1)).double().view(-1) - res_total = torch.sum(self.model.residuals(x, y) ** 2) - - for i in range(d): - # define new data - xnew = x.clone() - xnew[:,i] = 0. - - # define new model - GP = copy.deepcopy(self.model) - GP.fit_gp(xnew,y) - - # evaluate residuals - res = torch.sum(GP.residuals(xnew,y)**2) - - # store - importance[i] = res_total/res - print(i + 1, "/", d,':', res_total/res) - return importance \ No newline at end of file + +class FeatureRanker: + + def __init__(self, model: Estimator, mode: str = "explained variance"): + self.model = model + self.mode = mode + + if not hasattr(self.model, "kernel_object"): + print("Invalid estimator structure to run feature importance analysis") + + def importance(self): + + if self.mode == "explained variance": + return self.one_off_importance() + elif self.mode == "cross_validation": + raise NotImplementedError("This is not implemented.") + + def one_off_importance(self): + n, d = self.model.x.size() + x = self.model.x + y = self.model.y + # iterate over features and + importance = torch.zeros(size=(d, 1)).double().view(-1) + res_total = torch.sum(self.model.residuals(x, y) ** 2) + + for i in range(d): + # define new data + xnew = x.clone() + xnew[:, i] = 0.0 + + # define new model + GP = copy.deepcopy(self.model) + GP.fit_gp(xnew, y) + + # evaluate residuals + res = torch.sum(GP.residuals(xnew, y) ** 2) + + # store + importance[i] = res_total / res + print(i + 1, "/", d, ":", res_total / res) + return importance diff --git a/stpy/generative_models/conditional_generative_model.py b/stpy/generative_models/conditional_generative_model.py index 63b7e75..3d3f3a5 100644 --- a/stpy/generative_models/conditional_generative_model.py +++ b/stpy/generative_models/conditional_generative_model.py @@ -1,5 +1,7 @@ -class GenerativeModel(): +class GenerativeModel: pass + + class ConditionalGenerativeModel(GenerativeModel): x = np.random.randn(10) @@ -9,6 +11,5 @@ class ConditionalGenerativeModel(GenerativeModel): # find the largest element np.max(x) - - pass \ No newline at end of file + pass diff --git a/stpy/generative_models/cvae.py b/stpy/generative_models/cvae.py index d9f38a5..56918cf 100644 --- a/stpy/generative_models/cvae.py +++ b/stpy/generative_models/cvae.py @@ -7,7 +7,7 @@ # cuda setup device = torch.device("cpu") -kwargs = {'num_workers': 1, 'pin_memory': True} +kwargs = {"num_workers": 1, "pin_memory": True} # hyper params batch_size = 64 @@ -16,7 +16,6 @@ epochs = 10 - def one_hot(labels, class_size): targets = torch.zeros(labels.size(0), class_size) for i, label in enumerate(labels): @@ -25,13 +24,13 @@ def one_hot(labels, class_size): class CVAE(nn.Module): - def __init__(self, feature_size, latent_size, ouput_size, midsize = 400): + def __init__(self, feature_size, latent_size, ouput_size, midsize=400): super(CVAE, self).__init__() self.feature_size = feature_size self.class_size = ouput_size # encode - self.fc1 = nn.Linear(feature_size + ouput_size, midsize) + self.fc1 = nn.Linear(feature_size + ouput_size, midsize) self.fc21 = nn.Linear(midsize, latent_size) self.fc22 = nn.Linear(midsize, latent_size) @@ -42,28 +41,28 @@ def __init__(self, feature_size, latent_size, ouput_size, midsize = 400): self.elu = nn.ELU() self.sigmoid = nn.Sigmoid() - def encode(self, x, y): # Q(z|x, c) - ''' + def encode(self, x, y): # Q(z|x, c) + """ x: (bs, feature_size) y: (bs, class_size) - ''' - inputs = torch.cat([x, y], 1) # (bs, feature_size+class_size) + """ + inputs = torch.cat([x, y], 1) # (bs, feature_size+class_size) h1 = self.elu(self.fc1(inputs)) z_mu = self.fc21(h1) z_var = self.fc22(h1) return z_mu, z_var def reparameterize(self, mu, logvar): - std = torch.exp(0.5*logvar) + std = torch.exp(0.5 * logvar) eps = torch.randn_like(std) - return mu + eps*std + return mu + eps * std - def decode(self, z, y): # P(x|z, c) - ''' + def decode(self, z, y): # P(x|z, c) + """ z: (bs, latent_size) c: (bs, class_size) - ''' - inputs = torch.cat([z, y], 1) # (bs, latent_size+class_size) + """ + inputs = torch.cat([z, y], 1) # (bs, latent_size+class_size) h3 = self.elu(self.fc3(inputs)) return self.sigmoid(self.fc4(h3)) @@ -72,13 +71,15 @@ def forward(self, x, y): z = self.reparameterize(mu, logvar) return self.decode(z, y), mu, logvar + # create a CVAE model model = CVAE(1, 20, 1).to(device) optimizer = optim.Adam(model.parameters(), lr=1e-3) + # Reconstruction + KL divergence losses summed over all elements and batch def loss_function(recon_x, x, mu, logvar): - BCE = F.binary_cross_entropy(recon_x, x, reduction='sum') + BCE = F.binary_cross_entropy(recon_x, x, reduction="sum") KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp()) return BCE + KLD @@ -96,13 +97,21 @@ def train(epoch): train_loss += loss.detach().cpu().numpy() optimizer.step() if batch_idx % 20 == 0: - print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( - epoch, batch_idx * len(data), len(train_loader.dataset), - 100. * batch_idx / len(train_loader), - loss.item() / len(data))) - - print('====> Epoch: {} Average loss: {:.4f}'.format( - epoch, train_loss / len(train_loader.dataset))) + print( + "Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format( + epoch, + batch_idx * len(data), + len(train_loader.dataset), + 100.0 * batch_idx / len(train_loader), + loss.item() / len(data), + ) + ) + + print( + "====> Epoch: {} Average loss: {:.4f}".format( + epoch, train_loss / len(train_loader.dataset) + ) + ) def test(epoch): @@ -113,26 +122,27 @@ def test(epoch): data, labels = data.to(device), labels.to(device) labels = one_hot(labels, 10) recon_batch, mu, logvar = model(data, labels) - test_loss += loss_function(recon_batch, data, mu, logvar).detach().cpu().numpy() + test_loss += ( + loss_function(recon_batch, data, mu, logvar).detach().cpu().numpy() + ) if i == 0: n = min(data.size(0), 5) - comparison = torch.cat([data[:n], - recon_batch.view(-1, 1, 28, 28)[:n]]) - save_image(comparison.cpu(), - 'reconstruction_' + str(epoch) + '.png', nrow=n) + comparison = torch.cat([data[:n], recon_batch.view(-1, 1, 28, 28)[:n]]) + save_image( + comparison.cpu(), "reconstruction_" + str(epoch) + ".png", nrow=n + ) test_loss /= len(test_loader.dataset) - print('====> Test set loss: {:.4f}'.format(test_loss)) + print("====> Test set loss: {:.4f}".format(test_loss)) for epoch in range(1, epochs + 1): - train(epoch) + train(epoch) - test(epoch) + test(epoch) - with torch.no_grad(): - c = torch.eye(10, 10).cuda() - sample = torch.randn(10, 20).to(device) - sample = model.decode(sample, c).cpu() - save_image(sample.view(10, 1, 28, 28), - 'sample_' + str(epoch) + '.png') \ No newline at end of file + with torch.no_grad(): + c = torch.eye(10, 10).cuda() + sample = torch.randn(10, 20).to(device) + sample = model.decode(sample, c).cpu() + save_image(sample.view(10, 1, 28, 28), "sample_" + str(epoch) + ".png") diff --git a/stpy/generative_models/generative_sampler.py b/stpy/generative_models/generative_sampler.py index 3ed89b1..919c5f3 100644 --- a/stpy/generative_models/generative_sampler.py +++ b/stpy/generative_models/generative_sampler.py @@ -1,6 +1,7 @@ import torch -class GenerativeSampler(): + +class GenerativeSampler: def __init__(self): - pass \ No newline at end of file + pass diff --git a/stpy/helpers/ColorDB.py b/stpy/helpers/ColorDB.py index d964fce..fd16997 100644 --- a/stpy/helpers/ColorDB.py +++ b/stpy/helpers/ColorDB.py @@ -26,140 +26,139 @@ class BadColor(Exception): - pass + pass DEFAULT_DB = None -SPACE = ' ' -COMMASPACE = ', ' +SPACE = " " +COMMASPACE = ", " # generic class class ColorDB: - def __init__(self, fp): - lineno = 2 - self.__name = fp.name - # Maintain several dictionaries for indexing into the color database. - # Note that while Tk supports RGB intensities of 4, 8, 12, or 16 bits, - # for now we only support 8 bit intensities. At least on OpenWindows, - # all intensities in the /usr/openwin/lib/rgb.txt file are 8-bit - # - # key is (red, green, blue) tuple, value is (name, [aliases]) - self.__byrgb = {} - # key is name, value is (red, green, blue) - self.__byname = {} - # all unique names (non-aliases). built-on demand - self.__allnames = None - for line in fp: - # get this compiled regular expression from derived class - mo = self._re.match(line) - if not mo: - print('Error in', fp.name, ' line', lineno, file=sys.stderr) - lineno += 1 - continue - # extract the red, green, blue, and name - red, green, blue = self._extractrgb(mo) - name = self._extractname(mo) - keyname = name.lower() - # BAW: for now the `name' is just the first named color with the - # rgb values we find. Later, we might want to make the two word - # version the `name', or the CapitalizedVersion, etc. - key = (red, green, blue) - foundname, aliases = self.__byrgb.get(key, (name, [])) - if foundname != name and foundname not in aliases: - aliases.append(name) - self.__byrgb[key] = (foundname, aliases) - # add to byname lookup - self.__byname[keyname] = key - lineno = lineno + 1 - - # override in derived classes - def _extractrgb(self, mo): - return [int(x) for x in mo.group('red', 'green', 'blue')] - - def _extractname(self, mo): - return mo.group('name') - - def filename(self): - return self.__name - - def find_byrgb(self, rgbtuple): - """Return name for rgbtuple""" - try: - return self.__byrgb[rgbtuple] - except KeyError: - raise BadColor(rgbtuple) from None - - def find_byname(self, name): - """Return (red, green, blue) for name""" - name = name.lower() - try: - return self.__byname[name] - except KeyError: - raise BadColor(name) from None - - def nearest(self, red, green, blue): - """Return the name of color nearest (red, green, blue)""" - # BAW: should we use Voronoi diagrams, Delaunay triangulation, or - # octree for speeding up the locating of nearest point? Exhaustive - # search is inefficient, but seems fast enough. - nearest = -1 - nearest_name = '' - for name, aliases in self.__byrgb.values(): - r, g, b = self.__byname[name.lower()] - rdelta = red - r - gdelta = green - g - bdelta = blue - b - distance = rdelta * rdelta + gdelta * gdelta + bdelta * bdelta - if nearest == -1 or distance < nearest: - nearest = distance - nearest_name = name - return nearest_name - - def unique_names(self): - # sorted - if not self.__allnames: - self.__allnames = [] - for name, aliases in self.__byrgb.values(): - self.__allnames.append(name) - self.__allnames.sort(key=str.lower) - return self.__allnames - - def aliases_of(self, red, green, blue): - try: - name, aliases = self.__byrgb[(red, green, blue)] - except KeyError: - raise BadColor((red, green, blue)) from None - return [name] + aliases + def __init__(self, fp): + lineno = 2 + self.__name = fp.name + # Maintain several dictionaries for indexing into the color database. + # Note that while Tk supports RGB intensities of 4, 8, 12, or 16 bits, + # for now we only support 8 bit intensities. At least on OpenWindows, + # all intensities in the /usr/openwin/lib/rgb.txt file are 8-bit + # + # key is (red, green, blue) tuple, value is (name, [aliases]) + self.__byrgb = {} + # key is name, value is (red, green, blue) + self.__byname = {} + # all unique names (non-aliases). built-on demand + self.__allnames = None + for line in fp: + # get this compiled regular expression from derived class + mo = self._re.match(line) + if not mo: + print("Error in", fp.name, " line", lineno, file=sys.stderr) + lineno += 1 + continue + # extract the red, green, blue, and name + red, green, blue = self._extractrgb(mo) + name = self._extractname(mo) + keyname = name.lower() + # BAW: for now the `name' is just the first named color with the + # rgb values we find. Later, we might want to make the two word + # version the `name', or the CapitalizedVersion, etc. + key = (red, green, blue) + foundname, aliases = self.__byrgb.get(key, (name, [])) + if foundname != name and foundname not in aliases: + aliases.append(name) + self.__byrgb[key] = (foundname, aliases) + # add to byname lookup + self.__byname[keyname] = key + lineno = lineno + 1 + + # override in derived classes + def _extractrgb(self, mo): + return [int(x) for x in mo.group("red", "green", "blue")] + + def _extractname(self, mo): + return mo.group("name") + + def filename(self): + return self.__name + + def find_byrgb(self, rgbtuple): + """Return name for rgbtuple""" + try: + return self.__byrgb[rgbtuple] + except KeyError: + raise BadColor(rgbtuple) from None + + def find_byname(self, name): + """Return (red, green, blue) for name""" + name = name.lower() + try: + return self.__byname[name] + except KeyError: + raise BadColor(name) from None + + def nearest(self, red, green, blue): + """Return the name of color nearest (red, green, blue)""" + # BAW: should we use Voronoi diagrams, Delaunay triangulation, or + # octree for speeding up the locating of nearest point? Exhaustive + # search is inefficient, but seems fast enough. + nearest = -1 + nearest_name = "" + for name, aliases in self.__byrgb.values(): + r, g, b = self.__byname[name.lower()] + rdelta = red - r + gdelta = green - g + bdelta = blue - b + distance = rdelta * rdelta + gdelta * gdelta + bdelta * bdelta + if nearest == -1 or distance < nearest: + nearest = distance + nearest_name = name + return nearest_name + + def unique_names(self): + # sorted + if not self.__allnames: + self.__allnames = [] + for name, aliases in self.__byrgb.values(): + self.__allnames.append(name) + self.__allnames.sort(key=str.lower) + return self.__allnames + + def aliases_of(self, red, green, blue): + try: + name, aliases = self.__byrgb[(red, green, blue)] + except KeyError: + raise BadColor((red, green, blue)) from None + return [name] + aliases class RGBColorDB(ColorDB): - _re = re.compile( - r'\s*(?P\d+)\s+(?P\d+)\s+(?P\d+)\s+(?P.*)') + _re = re.compile(r"\s*(?P\d+)\s+(?P\d+)\s+(?P\d+)\s+(?P.*)") class HTML40DB(ColorDB): - _re = re.compile(r'(?P\S+)\s+(?P#[0-9a-fA-F]{6})') + _re = re.compile(r"(?P\S+)\s+(?P#[0-9a-fA-F]{6})") - def _extractrgb(self, mo): - return rrggbb_to_triplet(mo.group('hexrgb')) + def _extractrgb(self, mo): + return rrggbb_to_triplet(mo.group("hexrgb")) class LightlinkDB(HTML40DB): - _re = re.compile(r'(?P(.+))\s+(?P#[0-9a-fA-F]{6})') + _re = re.compile(r"(?P(.+))\s+(?P#[0-9a-fA-F]{6})") - def _extractname(self, mo): - return mo.group('name').strip() + def _extractname(self, mo): + return mo.group("name").strip() class WebsafeDB(ColorDB): - _re = re.compile('(?P#[0-9a-fA-F]{6})') + _re = re.compile("(?P#[0-9a-fA-F]{6})") - def _extractrgb(self, mo): - return rrggbb_to_triplet(mo.group('hexrgb')) + def _extractrgb(self, mo): + return rrggbb_to_triplet(mo.group("hexrgb")) - def _extractname(self, mo): - return mo.group('hexrgb').upper() + def _extractname(self, mo): + return mo.group("hexrgb").upper() # format is a tuple (RE, SCANLINES, CLASS) where RE is a compiled regular @@ -167,112 +166,111 @@ def _extractname(self, mo): # the class to instantiate if a match is found FILETYPES = [ - (re.compile('Xorg'), RGBColorDB), - (re.compile('XConsortium'), RGBColorDB), - (re.compile('HTML'), HTML40DB), - (re.compile('lightlink'), LightlinkDB), - (re.compile('Websafe'), WebsafeDB), + (re.compile("Xorg"), RGBColorDB), + (re.compile("XConsortium"), RGBColorDB), + (re.compile("HTML"), HTML40DB), + (re.compile("lightlink"), LightlinkDB), + (re.compile("Websafe"), WebsafeDB), ] def get_colordb(file, filetype=None): - colordb = None - fp = open(file) - try: - line = fp.readline() - if not line: - return None - # try to determine the type of RGB file it is - if filetype is None: - filetypes = FILETYPES - else: - filetypes = [filetype] - for typere, class_ in filetypes: - mo = typere.search(line) - if mo: - break - else: - # no matching type - return None - # we know the type and the class to grok the type, so suck it in - colordb = class_(fp) - finally: - fp.close() - # save a global copy - global DEFAULT_DB - DEFAULT_DB = colordb - return colordb + colordb = None + fp = open(file) + try: + line = fp.readline() + if not line: + return None + # try to determine the type of RGB file it is + if filetype is None: + filetypes = FILETYPES + else: + filetypes = [filetype] + for typere, class_ in filetypes: + mo = typere.search(line) + if mo: + break + else: + # no matching type + return None + # we know the type and the class to grok the type, so suck it in + colordb = class_(fp) + finally: + fp.close() + # save a global copy + global DEFAULT_DB + DEFAULT_DB = colordb + return colordb _namedict = {} def rrggbb_to_triplet(color): - """Converts a #rrggbb color to the tuple (red, green, blue).""" - rgbtuple = _namedict.get(color) - if rgbtuple is None: - if color[0] != '#': - raise BadColor(color) - red = color[1:3] - green = color[3:5] - blue = color[5:7] - rgbtuple = int(red, 16), int(green, 16), int(blue, 16) - _namedict[color] = rgbtuple - return rgbtuple + """Converts a #rrggbb color to the tuple (red, green, blue).""" + rgbtuple = _namedict.get(color) + if rgbtuple is None: + if color[0] != "#": + raise BadColor(color) + red = color[1:3] + green = color[3:5] + blue = color[5:7] + rgbtuple = int(red, 16), int(green, 16), int(blue, 16) + _namedict[color] = rgbtuple + return rgbtuple _tripdict = {} def triplet_to_rrggbb(rgbtuple): - """Converts a (red, green, blue) tuple to #rrggbb.""" - global _tripdict - hexname = _tripdict.get(rgbtuple) - if hexname is None: - hexname = '#%02x%02x%02x' % rgbtuple - _tripdict[rgbtuple] = hexname - return hexname + """Converts a (red, green, blue) tuple to #rrggbb.""" + global _tripdict + hexname = _tripdict.get(rgbtuple) + if hexname is None: + hexname = "#%02x%02x%02x" % rgbtuple + _tripdict[rgbtuple] = hexname + return hexname def triplet_to_fractional_rgb(rgbtuple): - return [x / 256 for x in rgbtuple] + return [x / 256 for x in rgbtuple] def triplet_to_brightness(rgbtuple): - # return the brightness (grey level) along the scale 0.0==black to - # 1.0==white - r = 0.299 - g = 0.587 - b = 0.114 - return r * rgbtuple[0] + g * rgbtuple[1] + b * rgbtuple[2] - - -if __name__ == '__main__': - colordb = get_colordb('colors.txt') - if not colordb: - print('No parseable color database found') - sys.exit(1) - # on my system, this color matches exactly - target = 'navy' - red, green, blue = rgbtuple = colordb.find_byname(target) - print(target, ':', red, green, blue, triplet_to_rrggbb(rgbtuple)) - print ("-----") - print (rgbtuple) - name, aliases = colordb.find_byrgb(rgbtuple) - print('name:', name, 'aliases:', COMMASPACE.join(aliases)) - r, g, b = (1, 1, 128) # nearest to navy - r, g, b = (145, 238, 144) # nearest to lightgreen - r, g, b = (255, 251, 250) # snow - print('finding nearest to', target, '...') - import time - - t0 = time.time() - nearest = colordb.nearest(r, g, b) - t1 = time.time() - print('found nearest color', nearest, 'in', t1 - t0, 'seconds') - # dump the database - for n in colordb.unique_names(): - r, g, b = colordb.find_byname(n) - aliases = colordb.aliases_of(r, g, b) - print('%20s: (%3d/%3d/%3d) == %s' % (n, r, g, b, - SPACE.join(aliases[1:]))) \ No newline at end of file + # return the brightness (grey level) along the scale 0.0==black to + # 1.0==white + r = 0.299 + g = 0.587 + b = 0.114 + return r * rgbtuple[0] + g * rgbtuple[1] + b * rgbtuple[2] + + +if __name__ == "__main__": + colordb = get_colordb("colors.txt") + if not colordb: + print("No parseable color database found") + sys.exit(1) + # on my system, this color matches exactly + target = "navy" + red, green, blue = rgbtuple = colordb.find_byname(target) + print(target, ":", red, green, blue, triplet_to_rrggbb(rgbtuple)) + print("-----") + print(rgbtuple) + name, aliases = colordb.find_byrgb(rgbtuple) + print("name:", name, "aliases:", COMMASPACE.join(aliases)) + r, g, b = (1, 1, 128) # nearest to navy + r, g, b = (145, 238, 144) # nearest to lightgreen + r, g, b = (255, 251, 250) # snow + print("finding nearest to", target, "...") + import time + + t0 = time.time() + nearest = colordb.nearest(r, g, b) + t1 = time.time() + print("found nearest color", nearest, "in", t1 - t0, "seconds") + # dump the database + for n in colordb.unique_names(): + r, g, b = colordb.find_byname(n) + aliases = colordb.aliases_of(r, g, b) + print("%20s: (%3d/%3d/%3d) == %s" % (n, r, g, b, SPACE.join(aliases[1:]))) diff --git a/stpy/helpers/abitrary_sampling.py b/stpy/helpers/abitrary_sampling.py index 428c03c..55eb46d 100644 --- a/stpy/helpers/abitrary_sampling.py +++ b/stpy/helpers/abitrary_sampling.py @@ -6,207 +6,217 @@ def sample_uniform_sphere(n, d, radius=1): - X = np.random.randn(n, d) - X_n = np.random.randn(n, d) - for i in range(n): - X_n[i, :] = (X[i, :] / np.linalg.norm(X[i, :])) * radius - return X_n + X = np.random.randn(n, d) + X_n = np.random.randn(n, d) + for i in range(n): + X_n[i, :] = (X[i, :] / np.linalg.norm(X[i, :])) * radius + return X_n def rejection_sampling(pdf, size=(1, 1)): - """ - Implements rejection sampling - - :param pdf: - :param size: - :return: - """ - n = size[0] - d = size[1] - output = np.zeros(shape=size) - i = 0 - while i < n: - Z = np.random.normal(size=(1, d)) - u = np.random.uniform() - if pdf(Z) < u: - output[i, :] = Z - i = i + 1 - - return output + """ + Implements rejection sampling + + :param pdf: + :param size: + :return: + """ + n = size[0] + d = size[1] + output = np.zeros(shape=size) + i = 0 + while i < n: + Z = np.random.normal(size=(1, d)) + u = np.random.uniform() + if pdf(Z) < u: + output[i, :] = Z + i = i + 1 + + return output def next_prime(): - def is_prime(num): - "Checks if num is a prime value" - for i in range(2, int(num ** 0.5) + 1): - if (num % i) == 0: return False - return True + def is_prime(num): + "Checks if num is a prime value" + for i in range(2, int(num**0.5) + 1): + if (num % i) == 0: + return False + return True - prime = 3 - while (1): - if is_prime(prime): - yield prime - prime += 2 + prime = 3 + while 1: + if is_prime(prime): + yield prime + prime += 2 def vdc(n, base=2): - vdc, denom = 0, 1 - while n: - denom *= base - n, remainder = divmod(n, base) - vdc += remainder / float(denom) - return vdc + vdc, denom = 0, 1 + while n: + denom *= base + n, remainder = divmod(n, base) + vdc += remainder / float(denom) + return vdc def halton_sequence(size, dim): - seq = [] - primeGen = next_prime() - next(primeGen) - for d in range(dim): - base = next(primeGen) - seq.append([vdc(i, base) for i in range(size)]) - return seq + seq = [] + primeGen = next_prime() + next(primeGen) + for d in range(dim): + base = next(primeGen) + seq.append([vdc(i, base) for i in range(size)]) + return seq def sample_qmc_halton_normal(size=(1, 1)): - Z = np.array(halton_sequence(size[0], size[1])).T - Z[0, :] += 10e-5 - from scipy.stats import norm - Z = norm.ppf(Z) - return Z + Z = np.array(halton_sequence(size[0], size[1])).T + Z[0, :] += 10e-5 + from scipy.stats import norm + + Z = norm.ppf(Z) + return Z def sample_qmc_halton(sampler, size=(1, 1)): - Z = np.array(halton_sequence(size[0], size[1]), dtype=np.float64).T - Z[0, :] += 10e-5 - Z = sampler(Z) - return Z + Z = np.array(halton_sequence(size[0], size[1]), dtype=np.float64).T + Z[0, :] += 10e-5 + Z = sampler(Z) + return Z def sample_bounded(bounds): - d = len(bounds) - x = np.zeros(shape=(d)) - for i in range(d): - x[i] = np.uniform(bounds[i][0], bounds[i][1]) - return x - - -def randomly_split_set_without_duplicates_balanced(x: torch.Tensor, - y: torch.Tensor, - max_bins: int = 2, - alpha: float = 0.2, - size: Union[int, float, None] = None): - # sort tensor - N = x.size()[0] - - out, indices = torch.unique(x, dim=0, return_inverse=True) - n, d = out.size() - if size is None: - ntest = int(alpha * n) - else: - ntest = size - y_out = y[np.unique(indices)] - - # bin the data - samples_per_bin, bins, = np.histogram(y_out, bins=max_bins) # Doane's method worked best for me - classes = np.digitize(y_out, bins) - classes[classes == max_bins+1] = max_bins - - # randomly split - s = StratifiedShuffleSplit(n_splits=1, test_size=ntest) - - for _, n_test_indices in s.split(out,classes): - mask_test = torch.zeros(N).bool() - for index in n_test_indices: - mask_test = torch.logical_or(mask_test, indices == index) - - return mask_test, ~mask_test - - -def randomly_split_set_without_duplicates(x: torch.Tensor, - alpha: float = 0.2, - size: Union[int, float, None] = None): - """ - Randomly splits the dataset and returns the mask of the - :param x: - :param alpha: - :return: - """ - - # sort tensor - N = x.size()[0] - - out, indices = torch.unique(x, dim=0, return_inverse=True) - - n, d = out.size() - if size is None: - ntest = int(alpha * n) - else: - ntest = size - - # randomly split - n_test_indices = np.random.choice(np.arange(0, n, 1), size=ntest, replace=False) - mask_test = torch.zeros(N).bool() - - for index in n_test_indices: - mask_test = torch.logical_or(mask_test, indices == index) - - return mask_test, ~mask_test - - -def randomly_split_set_without_duplicates_general(x: torch.Tensor, - sizes: List = [None]): - """ - Randomly splits the dataset and returns the mask of the - :param x: - :param alpha: - :return: - """ - - # sort tensor - N = x.size()[0] - - out, indices = torch.unique(x, dim=0, return_inverse=True) - # is number of unique elements - n, d = out.size() - - # randomly permute indices - inde = torch.from_numpy(np.random.permutation(np.arange(0, n, 1))) - cumsum_indices = torch.cumsum(torch.Tensor(sizes),0).int() - cumsum_indices = torch.cat((torch.Tensor([0]),cumsum_indices)).int() - - masks = [torch.zeros(N).bool() for _ in sizes] - for j in range(len(sizes)): - n_test_indices = inde[cumsum_indices[j]:min(n,cumsum_indices[j+1])] - for index in n_test_indices: - masks[j] = torch.logical_or(masks[j], indices == index) - - return masks + d = len(bounds) + x = np.zeros(shape=(d)) + for i in range(d): + x[i] = np.uniform(bounds[i][0], bounds[i][1]) + return x + + +def randomly_split_set_without_duplicates_balanced( + x: torch.Tensor, + y: torch.Tensor, + max_bins: int = 2, + alpha: float = 0.2, + size: Union[int, float, None] = None, +): + # sort tensor + N = x.size()[0] + + out, indices = torch.unique(x, dim=0, return_inverse=True) + n, d = out.size() + if size is None: + ntest = int(alpha * n) + else: + ntest = size + y_out = y[np.unique(indices)] + + # bin the data + ( + samples_per_bin, + bins, + ) = np.histogram( + y_out, bins=max_bins + ) # Doane's method worked best for me + classes = np.digitize(y_out, bins) + classes[classes == max_bins + 1] = max_bins + + # randomly split + s = StratifiedShuffleSplit(n_splits=1, test_size=ntest) + + for _, n_test_indices in s.split(out, classes): + mask_test = torch.zeros(N).bool() + for index in n_test_indices: + mask_test = torch.logical_or(mask_test, indices == index) + + return mask_test, ~mask_test + + +def randomly_split_set_without_duplicates( + x: torch.Tensor, alpha: float = 0.2, size: Union[int, float, None] = None +): + """ + Randomly splits the dataset and returns the mask of the + :param x: + :param alpha: + :return: + """ + + # sort tensor + N = x.size()[0] + + out, indices = torch.unique(x, dim=0, return_inverse=True) + + n, d = out.size() + if size is None: + ntest = int(alpha * n) + else: + ntest = size + + # randomly split + n_test_indices = np.random.choice(np.arange(0, n, 1), size=ntest, replace=False) + mask_test = torch.zeros(N).bool() + + for index in n_test_indices: + mask_test = torch.logical_or(mask_test, indices == index) + + return mask_test, ~mask_test + + +def randomly_split_set_without_duplicates_general( + x: torch.Tensor, sizes: List = [None] +): + """ + Randomly splits the dataset and returns the mask of the + :param x: + :param alpha: + :return: + """ + + # sort tensor + N = x.size()[0] + + out, indices = torch.unique(x, dim=0, return_inverse=True) + # is number of unique elements + n, d = out.size() + + # randomly permute indices + inde = torch.from_numpy(np.random.permutation(np.arange(0, n, 1))) + cumsum_indices = torch.cumsum(torch.Tensor(sizes), 0).int() + cumsum_indices = torch.cat((torch.Tensor([0]), cumsum_indices)).int() + + masks = [torch.zeros(N).bool() for _ in sizes] + for j in range(len(sizes)): + n_test_indices = inde[cumsum_indices[j] : min(n, cumsum_indices[j + 1])] + for index in n_test_indices: + masks[j] = torch.logical_or(masks[j], indices == index) + + return masks # if __name__ == "__main__": - # x = torch.Tensor([[2, 1, 1], [2, 1, 1], [2, 2, 2], - # [3, 2, 2], [2, 1, 1], [4, 2, 1], - # [4, 2, 4], [4,4,4], [1,2,2]]).double() - # - x = torch.randint(0, 10, size = (2000,3)) - y = torch.randn(size = (x.size()[0],1))*10 - - # masks = randomly_split_set_without_duplicates_general(x, sizes=[1,2,3]) - # - # for mask in masks: - # print (mask) - - masks = randomly_split_set_without_duplicates_balanced(x,y, size = 100, max_bins = 10) - masks2 = randomly_split_set_without_duplicates(x, size = 100) - import matplotlib.pyplot as plt - labels = ['test', 'train'] - for index,(mask,mask2) in enumerate(zip(masks,masks2)): - plt.hist(y[mask].T, alpha = 0.2, density= True, label = labels[index]) - plt.hist(y[mask2].T, alpha=0.2, density=True, label=labels[index]+"_random") - plt.legend() - plt.show() - + # x = torch.Tensor([[2, 1, 1], [2, 1, 1], [2, 2, 2], + # [3, 2, 2], [2, 1, 1], [4, 2, 1], + # [4, 2, 4], [4,4,4], [1,2,2]]).double() + # + x = torch.randint(0, 10, size=(2000, 3)) + y = torch.randn(size=(x.size()[0], 1)) * 10 + + # masks = randomly_split_set_without_duplicates_general(x, sizes=[1,2,3]) + # + # for mask in masks: + # print (mask) + + masks = randomly_split_set_without_duplicates_balanced(x, y, size=100, max_bins=10) + masks2 = randomly_split_set_without_duplicates(x, size=100) + import matplotlib.pyplot as plt + + labels = ["test", "train"] + for index, (mask, mask2) in enumerate(zip(masks, masks2)): + plt.hist(y[mask].T, alpha=0.2, density=True, label=labels[index]) + plt.hist(y[mask2].T, alpha=0.2, density=True, label=labels[index] + "_random") + plt.legend() + plt.show() diff --git a/stpy/helpers/coreset_helper.py b/stpy/helpers/coreset_helper.py index 84aaccf..85eae1f 100644 --- a/stpy/helpers/coreset_helper.py +++ b/stpy/helpers/coreset_helper.py @@ -3,26 +3,28 @@ def epsilon_net(borel_set, k): - pass + pass def coreset(borel_set, k): - pass + pass def coreset_leverage_score_greedy(borel_set, kernel, n, tol=10e-4): - xtest = borel_set.return_discretization(n) - k = kernel.kernel - N = xtest.size()[0] - score = 1 - K = k(xtest, xtest) - x = xtest[torch.randint(0, N, (1,)), :].view(1, -1) - c = 1 - while score > tol: - I = torch.eye(c).double() - scores = np.diag(K - k(xtest, x).T @ torch.pinverse(k(x, x) + tol * I) @ k(x, xtest).T) - index = np.argmax(scores) - x = torch.cat((x, xtest[index, :].view(1, -1))) - score = scores[index] - c = c + 1 - return x + xtest = borel_set.return_discretization(n) + k = kernel.kernel + N = xtest.size()[0] + score = 1 + K = k(xtest, xtest) + x = xtest[torch.randint(0, N, (1,)), :].view(1, -1) + c = 1 + while score > tol: + I = torch.eye(c).double() + scores = np.diag( + K - k(xtest, x).T @ torch.pinverse(k(x, x) + tol * I) @ k(x, xtest).T + ) + index = np.argmax(scores) + x = torch.cat((x, xtest[index, :].view(1, -1))) + score = scores[index] + c = c + 1 + return x diff --git a/stpy/helpers/ellipsoid_algorithms.py b/stpy/helpers/ellipsoid_algorithms.py index a67ed38..c485943 100644 --- a/stpy/helpers/ellipsoid_algorithms.py +++ b/stpy/helpers/ellipsoid_algorithms.py @@ -7,423 +7,483 @@ def maximum_volume_ellipsoid_l1_polytope_ellipse(ellipse, l1_polytope, verbose=False): - """ - ellipse is - xA_ix + 2b_i x + c_i \leq 0 - - \sum q_i | x^\top a_i - b_i | - - :param ellipse: - :param polytope: - :param verbose: - :return: - """ - - p = ellipse[0].shape[0] - - B = cp.Variable((p, p), PSD=True) - d = cp.Variable((p, 1)) - lam = cp.Variable((1, 1)) - obj_max = cp.Maximize(cp.log_det(B)) - - constraints = [] - A, b, c = ellipse - - eye = np.eye(p) - zeros = np.zeros(shape=(1, p)) - invA = np.linalg.inv(A) - - constraints.append( - cp.bmat([ - [-lam - c + b.T @ invA @ b, zeros, d.T + b.T @ invA.T], - [zeros.T, lam * eye, B], - [d + invA @ b, B, invA]]) >> 0) - - q, X, y, eps = l1_polytope - m = X.shape[0] - t = cp.Variable((m, 1)) - constraints.append(q.T @ t <= eps) - constraints.append(t >= 0.) - for i in range(m): - ai = X[i, :] - bi = y[i] - constraints.append(cp.norm2(B @ ai) + ai.T @ d - bi <= t[i]) - constraints.append(cp.norm2(B @ ai) - ai.T @ d + bi <= t[i]) - - prob = cp.Problem(obj_max, constraints) - prob.solve(solver=cp.MOSEK, verbose=verbose) - - print(prob.status) - if B.value is not None: - return np.linalg.inv(B.value).T @ np.linalg.inv(B.value), d.value - else: - return None, None - - -def maximum_volume_ellipsoid_relu_polytope_ellipse(ellipse, relu_polytope, verbose=False): - """ - ellipse is - xA_ix + 2b_i x + c_i \leq 0 - - - (eta_i + x^x_i) \leq eps_i - - :param ellipse: - :param polytope: - :param verbose: - :return: - """ - - p = ellipse[0].shape[0] - - B = cp.Variable((p, p), PSD=True) - d = cp.Variable((p, 1)) - lam = cp.Variable((1, 1)) - obj_max = cp.Maximize(cp.log_det(B)) - - constraints = [] - A, b, c = ellipse - - eye = np.eye(p) - zeros = np.zeros(shape=(1, p)) - invA = np.linalg.inv(A) - - constraints.append( - cp.bmat([ - [-lam - c + b.T @ invA @ b, zeros, d.T + b.T @ invA.T], - [zeros.T, lam * eye, B], - [d + invA @ b, B, invA]]) >> 0) - - q, X, y, eps = relu_polytope - m = X.shape[0] - t = cp.Variable((m, 1)) - constraints.append(q.T @ t <= eps) - constraints.append(t >= 0.) - for i in range(m): - ai = X[i, :] - bi = y[i] - constraints.append(cp.pos(cp.norm2(B @ ai) + ai.T @ d - bi) <= t[i]) - - prob = cp.Problem(obj_max, constraints) - prob.solve(solver=cp.MOSEK, verbose=verbose) - - print(prob.status) - if B.value is not None: - return np.linalg.inv(B.value).T @ np.linalg.inv(B.value), d.value - else: - return None, None - - -def maximum_volume_ellipsoid_intersection_ellipsoids(ellipses, planes=None, verbose=False): - """ - Each ellipse is - xA_ix + 2b_i x + c_i \leq 0 - - :param elipses: list of [A,b,c] - - :return:elipse ||x-v||_B^2 < 1 - """ - - p = ellipses[0][0].shape[0] - m = len(ellipses) - - B = cp.Variable((p, p), PSD=True) - d = cp.Variable((p, 1)) - lam = cp.Variable((m, 1)) - - obj_max = cp.Maximize(cp.log_det(B)) - - constraints = [] - for index, ellipse in enumerate(ellipses): - A, b, c = ellipse - - eye = np.eye(p) - zeros = np.zeros(shape=(1, p)) - invA = np.linalg.inv(A) - - constraints.append( - cp.bmat([ - [-lam[index, 0] - c + b.T @ invA @ b, zeros, d.T + b.T @ invA.T], - [zeros.T, lam[index, 0] * eye, B], - [d + invA @ b, B, invA]]) >> 0) - - if planes is not None: - for index, plane in enumerate(planes): - a, b = plane - constraints.append(cp.norm2(B @ a) + a.T @ d <= b) - - prob = cp.Problem(obj_max, constraints) - prob.solve(solver=cp.MOSEK, verbose=verbose) - - print(prob.status) - if B.value is not None: - return np.linalg.inv(B.value).T @ np.linalg.inv(B.value), d.value - else: - return None, None + """ + ellipse is + xA_ix + 2b_i x + c_i \leq 0 + + \sum q_i | x^\top a_i - b_i | + + :param ellipse: + :param polytope: + :param verbose: + :return: + """ + + p = ellipse[0].shape[0] + + B = cp.Variable((p, p), PSD=True) + d = cp.Variable((p, 1)) + lam = cp.Variable((1, 1)) + obj_max = cp.Maximize(cp.log_det(B)) + + constraints = [] + A, b, c = ellipse + + eye = np.eye(p) + zeros = np.zeros(shape=(1, p)) + invA = np.linalg.inv(A) + + constraints.append( + cp.bmat( + [ + [-lam - c + b.T @ invA @ b, zeros, d.T + b.T @ invA.T], + [zeros.T, lam * eye, B], + [d + invA @ b, B, invA], + ] + ) + >> 0 + ) + + q, X, y, eps = l1_polytope + m = X.shape[0] + t = cp.Variable((m, 1)) + constraints.append(q.T @ t <= eps) + constraints.append(t >= 0.0) + for i in range(m): + ai = X[i, :] + bi = y[i] + constraints.append(cp.norm2(B @ ai) + ai.T @ d - bi <= t[i]) + constraints.append(cp.norm2(B @ ai) - ai.T @ d + bi <= t[i]) + + prob = cp.Problem(obj_max, constraints) + prob.solve(solver=cp.MOSEK, verbose=verbose) + + print(prob.status) + if B.value is not None: + return np.linalg.inv(B.value).T @ np.linalg.inv(B.value), d.value + else: + return None, None + + +def maximum_volume_ellipsoid_relu_polytope_ellipse( + ellipse, relu_polytope, verbose=False +): + """ + ellipse is + xA_ix + 2b_i x + c_i \leq 0 + + + (eta_i + x^x_i) \leq eps_i + + :param ellipse: + :param polytope: + :param verbose: + :return: + """ + + p = ellipse[0].shape[0] + + B = cp.Variable((p, p), PSD=True) + d = cp.Variable((p, 1)) + lam = cp.Variable((1, 1)) + obj_max = cp.Maximize(cp.log_det(B)) + + constraints = [] + A, b, c = ellipse + + eye = np.eye(p) + zeros = np.zeros(shape=(1, p)) + invA = np.linalg.inv(A) + + constraints.append( + cp.bmat( + [ + [-lam - c + b.T @ invA @ b, zeros, d.T + b.T @ invA.T], + [zeros.T, lam * eye, B], + [d + invA @ b, B, invA], + ] + ) + >> 0 + ) + + q, X, y, eps = relu_polytope + m = X.shape[0] + t = cp.Variable((m, 1)) + constraints.append(q.T @ t <= eps) + constraints.append(t >= 0.0) + for i in range(m): + ai = X[i, :] + bi = y[i] + constraints.append(cp.pos(cp.norm2(B @ ai) + ai.T @ d - bi) <= t[i]) + + prob = cp.Problem(obj_max, constraints) + prob.solve(solver=cp.MOSEK, verbose=verbose) + + print(prob.status) + if B.value is not None: + return np.linalg.inv(B.value).T @ np.linalg.inv(B.value), d.value + else: + return None, None + + +def maximum_volume_ellipsoid_intersection_ellipsoids( + ellipses, planes=None, verbose=False +): + """ + Each ellipse is + xA_ix + 2b_i x + c_i \leq 0 + + :param elipses: list of [A,b,c] + + :return:elipse ||x-v||_B^2 < 1 + """ + + p = ellipses[0][0].shape[0] + m = len(ellipses) + + B = cp.Variable((p, p), PSD=True) + d = cp.Variable((p, 1)) + lam = cp.Variable((m, 1)) + + obj_max = cp.Maximize(cp.log_det(B)) + + constraints = [] + for index, ellipse in enumerate(ellipses): + A, b, c = ellipse + + eye = np.eye(p) + zeros = np.zeros(shape=(1, p)) + invA = np.linalg.inv(A) + + constraints.append( + cp.bmat( + [ + [-lam[index, 0] - c + b.T @ invA @ b, zeros, d.T + b.T @ invA.T], + [zeros.T, lam[index, 0] * eye, B], + [d + invA @ b, B, invA], + ] + ) + >> 0 + ) + + if planes is not None: + for index, plane in enumerate(planes): + a, b = plane + constraints.append(cp.norm2(B @ a) + a.T @ d <= b) + + prob = cp.Problem(obj_max, constraints) + prob.solve(solver=cp.MOSEK, verbose=verbose) + + print(prob.status) + if B.value is not None: + return np.linalg.inv(B.value).T @ np.linalg.inv(B.value), d.value + else: + return None, None # return B.value, -d.value + def ellipsoid_cut(c, B, a, beta): - """ - :param c: elipsoid center - :param B: elipsoid covariance - :param a: a - :param beta: - - (x-c)^\top B^{-1} (x-c) \leq 1 - a^x \leq \beta - - :return: - """ - N = a.T @ B @ a - print(N) - alpha = (a.T @ c - beta) / np.sqrt(N) - if alpha > 0: - d = c.shape[0] - tau = (1 + d * alpha) / (d + 1) - delta = ((d ** 2) / (d ** 2 - 1)) * (1 - alpha ** 2) - sigma = (2. * (1 + d * alpha)) / ((d + 1) * (1 + alpha)) - - s = B @ a - c = c + tau * (s / np.sqrt(N)) - B = delta * (B - sigma * (s @ s.T) / (N)) - return (c, B) + """ + :param c: elipsoid center + :param B: elipsoid covariance + :param a: a + :param beta: + + (x-c)^\top B^{-1} (x-c) \leq 1 + a^x \leq \beta + + :return: + """ + N = a.T @ B @ a + print(N) + alpha = (a.T @ c - beta) / np.sqrt(N) + if alpha > 0: + d = c.shape[0] + tau = (1 + d * alpha) / (d + 1) + delta = ((d**2) / (d**2 - 1)) * (1 - alpha**2) + sigma = (2.0 * (1 + d * alpha)) / ((d + 1) * (1 + alpha)) + + s = B @ a + c = c + tau * (s / np.sqrt(N)) + B = delta * (B - sigma * (s @ s.T) / (N)) + return (c, B) def maximize_on_elliptical_slice(x, Sigma, mu, c, l, Lambda, u): - """ - solves the problem - min x^\top \theta - s.t. (\theta - \mu)Sigma(\theta - \mu) \leq c - l \leq Lambda \theta \leq u - """ - - m = x.shape[0] - zero = np.zeros(m) - theta = cp.Variable(m) - obj_max = cp.Maximize(x @ theta) - Sigma_sqrt = np.linalg.cholesky(Sigma) - constraints = [cp.SOC(zero.T @ theta + c, Sigma_sqrt @ (theta - mu))] - constraints.append(Lambda @ theta >= l) - constraints.append(Lambda @ theta <= u) - prob = cp.Problem(obj_max, constraints) - prob.solve(solver=cp.SCS, verbose=True) - val = prob.value - theta = theta.value - return val, theta + """ + solves the problem + min x^\top \theta + s.t. (\theta - \mu)Sigma(\theta - \mu) \leq c + l \leq Lambda \theta \leq u + """ + + m = x.shape[0] + zero = np.zeros(m) + theta = cp.Variable(m) + obj_max = cp.Maximize(x @ theta) + Sigma_sqrt = np.linalg.cholesky(Sigma) + constraints = [cp.SOC(zero.T @ theta + c, Sigma_sqrt @ (theta - mu))] + constraints.append(Lambda @ theta >= l) + constraints.append(Lambda @ theta <= u) + prob = cp.Problem(obj_max, constraints) + prob.solve(solver=cp.SCS, verbose=True) + val = prob.value + theta = theta.value + return val, theta def maximize_matrix_quadratic_on_ellipse(X, Sigma, mu, c, threads=4): - """ - solves the problem - max \theta ^top Z \theta - s.t. (\theta - \mu)Sigma(\theta - \mu) \leq c - """ - a = -X @ mu.reshape(-1) - val, theta = QCQP_problem(-X, a, c, Sigma=Sigma, threads=threads) - val = -val + mu @ X @ mu - return val, theta + """ + solves the problem + max \theta ^top Z \theta + s.t. (\theta - \mu)Sigma(\theta - \mu) \leq c + """ + a = -X @ mu.reshape(-1) + val, theta = QCQP_problem(-X, a, c, Sigma=Sigma, threads=threads) + val = -val + mu @ X @ mu + return val, theta def minimize_matrix_quadratic_on_ellipse(Z, Sigma, mu, c, threads=4): - """ - solves the problem - min \theta ^top Z \theta - s.t. (\theta - \mu)Sigma(\theta - \mu) \leq c - """ - - m = Z.shape[0] - zero = np.zeros(m) - Sigma_sqrt = np.linalg.cholesky(Sigma) - theta = cp.Variable(m) - obj = cp.Minimize(cp.quad_form(theta, Z)) - constraints = [cp.SOC(zero.T @ theta + c, Sigma_sqrt @ (theta - mu))] - prob = cp.Problem(obj, constraints) - prob.solve(solver=cp.MOSEK, verbose=False, - mosek_params={mosek.iparam.intpnt_solve_form: mosek.solveform.dual, - mosek.iparam.num_threads: threads}) - val = prob.value - theta = theta.value - return val, theta + """ + solves the problem + min \theta ^top Z \theta + s.t. (\theta - \mu)Sigma(\theta - \mu) \leq c + """ + + m = Z.shape[0] + zero = np.zeros(m) + Sigma_sqrt = np.linalg.cholesky(Sigma) + theta = cp.Variable(m) + obj = cp.Minimize(cp.quad_form(theta, Z)) + constraints = [cp.SOC(zero.T @ theta + c, Sigma_sqrt @ (theta - mu))] + prob = cp.Problem(obj, constraints) + prob.solve( + solver=cp.MOSEK, + verbose=False, + mosek_params={ + mosek.iparam.intpnt_solve_form: mosek.solveform.dual, + mosek.iparam.num_threads: threads, + }, + ) + val = prob.value + theta = theta.value + return val, theta def maximize_quadratic_on_ellipse(x, Sigma, mu, c, threads=4): - """ - solves the problem - max (x^\top \theta)^2 - s.t. (\theta - \mu)Sigma(\theta - \mu) \leq c - """ - X = x.reshape(-1, 1) @ x.reshape(1, -1) - a = -X @ mu.reshape(-1) - val, theta = QCQP_problem(-X, a, c, Sigma=Sigma, threads=threads) - val = -val + mu @ X @ mu - return val, theta + """ + solves the problem + max (x^\top \theta)^2 + s.t. (\theta - \mu)Sigma(\theta - \mu) \leq c + """ + X = x.reshape(-1, 1) @ x.reshape(1, -1) + a = -X @ mu.reshape(-1) + val, theta = QCQP_problem(-X, a, c, Sigma=Sigma, threads=threads) + val = -val + mu @ X @ mu + return val, theta def minimize_quadratic_on_ellipse(x, Sigma, mu, c, threads=4): - """ - solves the problem - min (x^\top \theta)^2 - s.t. (\theta - \mu)Sigma(\theta - \mu) \leq c - """ - - m = x.shape[0] - zero = np.zeros(m) - Sigma_sqrt = np.linalg.cholesky(Sigma) - theta = cp.Variable(m) - obj = cp.Minimize((x @ theta) ** 2) - constraints = [cp.SOC(zero.T @ theta + c, Sigma_sqrt @ (theta - mu))] - prob = cp.Problem(obj, constraints) - prob.solve(solver=cp.MOSEK, verbose=False, - mosek_params={mosek.iparam.intpnt_solve_form: mosek.solveform.dual, - mosek.iparam.num_threads: threads}) - val = prob.value - theta = theta.value - return val, theta + """ + solves the problem + min (x^\top \theta)^2 + s.t. (\theta - \mu)Sigma(\theta - \mu) \leq c + """ + + m = x.shape[0] + zero = np.zeros(m) + Sigma_sqrt = np.linalg.cholesky(Sigma) + theta = cp.Variable(m) + obj = cp.Minimize((x @ theta) ** 2) + constraints = [cp.SOC(zero.T @ theta + c, Sigma_sqrt @ (theta - mu))] + prob = cp.Problem(obj, constraints) + prob.solve( + solver=cp.MOSEK, + verbose=False, + mosek_params={ + mosek.iparam.intpnt_solve_form: mosek.solveform.dual, + mosek.iparam.num_threads: threads, + }, + ) + val = prob.value + theta = theta.value + return val, theta def KY_initialization(X): - (n, d) = X.shape - y = np.zeros(shape=(d, d,)) - zs = [] - c = np.random.randn(d) - for j in range(d): - id_max = np.argmax(X @ c) - id_min = np.argmin(X @ c) - - z_max = X[np.argmax(X @ c), :] - z_min = X[np.argmin(X @ c), :] - - zs = zs + [id_max, id_min] - y[j, :] = z_max - z_min - - c = np.random.randn(d) - for i in range(j): - c = c - ((np.dot(c, y[i, :])) / (np.dot(y[i, :], y[i, :]))) * y[i, :] - - mu = np.zeros(shape=(n)) - mu[zs] = 1. - mu = mu / np.sum(mu) - return mu + (n, d) = X.shape + y = np.zeros( + shape=( + d, + d, + ) + ) + zs = [] + c = np.random.randn(d) + for j in range(d): + id_max = np.argmax(X @ c) + id_min = np.argmin(X @ c) + + z_max = X[np.argmax(X @ c), :] + z_min = X[np.argmin(X @ c), :] + + zs = zs + [id_max, id_min] + y[j, :] = z_max - z_min + + c = np.random.randn(d) + for i in range(j): + c = c - ((np.dot(c, y[i, :])) / (np.dot(y[i, :], y[i, :]))) * y[i, :] + + mu = np.zeros(shape=(n)) + mu[zs] = 1.0 + mu = mu / np.sum(mu) + return mu def KY_initialization_modified(X): - (n, d) = X.shape - y = np.zeros(shape=(d, d,)) - zs = [] - c = np.random.randn(d) - for j in range(d): - id_max = np.argmax(X @ c) - id_min = np.argmin(X @ c) - - z_max = X[np.argmax(X @ c), :] - z_min = X[np.argmin(X @ c), :] - - zs = zs + [id_max] - y[j, :] = z_max - z_min - - c = np.random.randn(d) - for i in range(j): - c = c - ((np.dot(c, y[i, :])) / (np.dot(y[i, :], y[i, :]))) * y[i, :] - - mu = np.zeros(shape=(n)) - mu[zs] = 1. - mu = mu / np.sum(mu) - return mu - - -def plot_ellipse(offset, cov, scale=1, theta_num=1000, axis=None, plot_kwargs=None, fill=False, fill_kwargs=None, - color='r'): - ''' - offset = 2d array which gives center of ellipse - cov = covariance of ellipse - scale = scale ellipse by constant factor - theta_num = used for a linspace below, not sure exactly (?) - - ''' - # Get Ellipse Properties from cov matrix - - eig_vec, eig_val, u = np.linalg.svd(cov) - # Make sure 0th eigenvector has positive x-coordinate - if eig_vec[0][0] < 0: - eig_vec[0] *= -1 - semimaj = np.sqrt(eig_val[0]) - semimin = np.sqrt(eig_val[1]) - semimaj *= scale - semimin *= scale - phi = np.arccos(np.dot(eig_vec[0], np.array([1, 0]))) - if eig_vec[0][1] < 0 and phi > 0: - phi *= -1 - - # Generate data for ellipse structure - theta = np.linspace(0, 2 * np.pi, theta_num) - r = 1 / np.sqrt((np.cos(theta)) ** 2 + (np.sin(theta)) ** 2) - x = r * np.cos(theta) - y = r * np.sin(theta) - data = np.array([x, y]) - S = np.array([[semimaj, 0], [0, semimin]]) - R = np.array([[np.cos(phi), -np.sin(phi)], [np.sin(phi), np.cos(phi)]]) - T = np.dot(R, S) - data = np.dot(T, data) - data[0] += offset[0] - data[1] += offset[1] - - # Plot! - return_fig = False - if axis is None: - axis = plt.gca() - - if plot_kwargs is None: - p, = axis.plot(data[0], data[1], color=color, linestyle='-') - else: - p, = axis.plot(data[0], data[1], **plot_kwargs) - - if fill == True: - if fill_kwargs is None: - fill_kwargs = dict() - axis.fill(data[0], data[1], alpha=0.2, color=color) + (n, d) = X.shape + y = np.zeros( + shape=( + d, + d, + ) + ) + zs = [] + c = np.random.randn(d) + for j in range(d): + id_max = np.argmax(X @ c) + id_min = np.argmin(X @ c) + + z_max = X[np.argmax(X @ c), :] + z_min = X[np.argmin(X @ c), :] + + zs = zs + [id_max] + y[j, :] = z_max - z_min + + c = np.random.randn(d) + for i in range(j): + c = c - ((np.dot(c, y[i, :])) / (np.dot(y[i, :], y[i, :]))) * y[i, :] + + mu = np.zeros(shape=(n)) + mu[zs] = 1.0 + mu = mu / np.sum(mu) + return mu + + +def plot_ellipse( + offset, + cov, + scale=1, + theta_num=1000, + axis=None, + plot_kwargs=None, + fill=False, + fill_kwargs=None, + color="r", +): + """ + offset = 2d array which gives center of ellipse + cov = covariance of ellipse + scale = scale ellipse by constant factor + theta_num = used for a linspace below, not sure exactly (?) + + """ + # Get Ellipse Properties from cov matrix + + eig_vec, eig_val, u = np.linalg.svd(cov) + # Make sure 0th eigenvector has positive x-coordinate + if eig_vec[0][0] < 0: + eig_vec[0] *= -1 + semimaj = np.sqrt(eig_val[0]) + semimin = np.sqrt(eig_val[1]) + semimaj *= scale + semimin *= scale + phi = np.arccos(np.dot(eig_vec[0], np.array([1, 0]))) + if eig_vec[0][1] < 0 and phi > 0: + phi *= -1 + + # Generate data for ellipse structure + theta = np.linspace(0, 2 * np.pi, theta_num) + r = 1 / np.sqrt((np.cos(theta)) ** 2 + (np.sin(theta)) ** 2) + x = r * np.cos(theta) + y = r * np.sin(theta) + data = np.array([x, y]) + S = np.array([[semimaj, 0], [0, semimin]]) + R = np.array([[np.cos(phi), -np.sin(phi)], [np.sin(phi), np.cos(phi)]]) + T = np.dot(R, S) + data = np.dot(T, data) + data[0] += offset[0] + data[1] += offset[1] + + # Plot! + return_fig = False + if axis is None: + axis = plt.gca() + + if plot_kwargs is None: + (p,) = axis.plot(data[0], data[1], color=color, linestyle="-") + else: + (p,) = axis.plot(data[0], data[1], **plot_kwargs) + + if fill == True: + if fill_kwargs is None: + fill_kwargs = dict() + axis.fill(data[0], data[1], alpha=0.2, color=color) if __name__ == "__main__": - d = 2 + d = 2 - s1 = 1 - s2 = 1 + s1 = 1 + s2 = 1 - A1 = np.random.randn(d, d) - A1 = A1.T @ A1 + A1 = np.random.randn(d, d) + A1 = A1.T @ A1 - A2 = np.random.randn(d, d) - A2 = A2.T @ A2 + A2 = np.random.randn(d, d) + A2 = A2.T @ A2 - center1 = np.zeros((d, 1)) - center2 = np.ones((d, 1)) + center1 = np.zeros((d, 1)) + center2 = np.ones((d, 1)) - b1 = - A1 @ center1 - b2 = - A2 @ center2 + b1 = -A1 @ center1 + b2 = -A2 @ center2 - c1 = -s1 + center1.T @ A1 @ center1 - c2 = -s2 + center2.T @ A2 @ center2 + c1 = -s1 + center1.T @ A1 @ center1 + c2 = -s2 + center2.T @ A2 @ center2 - # ellipsoids = [[A1,b1,c1],[A2,b2,c2]] - ellipsoids = [[A2, b2, c2]] - planes = [[center2, np.array([[0.]])]] + # ellipsoids = [[A1,b1,c1],[A2,b2,c2]] + ellipsoids = [[A2, b2, c2]] + planes = [[center2, np.array([[0.0]])]] - A, b = maximum_volume_ellipsoid_intersection_ellipsoids(ellipsoids, planes=planes) - # c = 1 + A, b = maximum_volume_ellipsoid_intersection_ellipsoids(ellipsoids, planes=planes) + # c = 1 - axis = plt.gca() + axis = plt.gca() - ## the cov is - # (x-center)cov^{-1}(x-center) - # plot_ellipse(np.array([0.,0.]), cov=np.array([[2,0.],[0.0,2.]]), scale = 1., axis=axis, fill=True, color = 'purple') + ## the cov is + # (x-center)cov^{-1}(x-center) + # plot_ellipse(np.array([0.,0.]), cov=np.array([[2,0.],[0.0,2.]]), scale = 1., axis=axis, fill=True, color = 'purple') - plot_ellipse(center1.reshape(-1), cov=np.linalg.inv(A1), scale=1., axis=axis, fill=True) - plot_ellipse(center2.reshape(-1), cov=np.linalg.inv(A2), scale=1., axis=axis, fill=True, color='b') + plot_ellipse( + center1.reshape(-1), cov=np.linalg.inv(A1), scale=1.0, axis=axis, fill=True + ) + plot_ellipse( + center2.reshape(-1), + cov=np.linalg.inv(A2), + scale=1.0, + axis=axis, + fill=True, + color="b", + ) - plot_ellipse(b.reshape(-1), cov=np.linalg.inv(A), scale=1., axis=axis, fill=True, color='g') + plot_ellipse( + b.reshape(-1), cov=np.linalg.inv(A), scale=1.0, axis=axis, fill=True, color="g" + ) - plt.xlim([-4, 4]) - plt.ylim([-4, 4]) - plt.show() + plt.xlim([-4, 4]) + plt.ylim([-4, 4]) + plt.show() diff --git a/stpy/helpers/haarfisz_transform.py b/stpy/helpers/haarfisz_transform.py index 0c975d6..3c95a8e 100644 --- a/stpy/helpers/haarfisz_transform.py +++ b/stpy/helpers/haarfisz_transform.py @@ -3,98 +3,103 @@ """ + import numpy as np def haar_fisz_transform(data): - a = 2. - n = data.shape[0] - nhalf = n // 2 + a = 2.0 + n = data.shape[0] + nhalf = n // 2 - J = np.log2(n) - res = data.copy() - sm = np.zeros(shape=nhalf, dtype=float) - det = sm.copy() + J = np.log2(n) + res = data.copy() + sm = np.zeros(shape=nhalf, dtype=float) + det = sm.copy() - for i in np.arange(0, J, 1): - indices = np.arange(0, nhalf, 1) + for i in np.arange(0, J, 1): + indices = np.arange(0, nhalf, 1) - sm[0:nhalf] = (res[2 * indices] + res[2 * indices + 1]) / a - det[0:nhalf] = (res[2 * indices] - res[2 * indices + 1]) / a + sm[0:nhalf] = (res[2 * indices] + res[2 * indices + 1]) / a + det[0:nhalf] = (res[2 * indices] - res[2 * indices + 1]) / a - det[sm > 0] = det[sm > 0] / np.sqrt(sm[sm > 0]) + det[sm > 0] = det[sm > 0] / np.sqrt(sm[sm > 0]) - res[0:nhalf] = sm[0:nhalf] - res[nhalf:n] = det[0:nhalf] + res[0:nhalf] = sm[0:nhalf] + res[nhalf:n] = det[0:nhalf] - n = n // 2 - nhalf = nhalf // 2 - sm = np.zeros(shape=nhalf) - det = sm.copy() + n = n // 2 + nhalf = nhalf // 2 + sm = np.zeros(shape=nhalf) + det = sm.copy() - nhalf = 1 - n = 2 - sm = np.zeros(shape=nhalf) - det = sm.copy() - for i in np.arange(0, J, 1): - indices = np.arange(0, nhalf, 1) - sm[indices] = res[indices] - det[indices] = res[nhalf:n] - res[2 * indices] = a / 2. * (sm[indices] + det[indices]) - res[2 * indices + 1] = a / 2. * (sm[indices] - det[indices]) + nhalf = 1 + n = 2 + sm = np.zeros(shape=nhalf) + det = sm.copy() + for i in np.arange(0, J, 1): + indices = np.arange(0, nhalf, 1) + sm[indices] = res[indices] + det[indices] = res[nhalf:n] + res[2 * indices] = a / 2.0 * (sm[indices] + det[indices]) + res[2 * indices + 1] = a / 2.0 * (sm[indices] - det[indices]) - n = 2 * n - nhalf = 2 * nhalf + n = 2 * n + nhalf = 2 * nhalf - sm = np.zeros(shape=nhalf) - det = sm.copy() - return res + sm = np.zeros(shape=nhalf) + det = sm.copy() + return res def inverse_haar_fisz_transform(data): - a = 2. - n = data.shape[0] - nhalf = n // 2 - J = np.log2(n) - res = data.copy() - sm = np.zeros(shape=nhalf) - det = sm.copy() - - for i in np.arange(0, J, 1): - indices = np.arange(0, nhalf, 1) - - sm[0:nhalf] = (res[2 * indices] + res[2 * indices + 1]) / a - det[0:nhalf] = (res[2 * indices] - res[2 * indices + 1]) / a - res[0:nhalf] = sm[0:nhalf] - res[(nhalf):n] = det[0:nhalf] - n = n // 2 - nhalf = nhalf // 2 - - nhalf = 1 - n = 2 - - for i in np.arange(0, J, 1): - sm[0:nhalf] = res[0:nhalf] - det[0:nhalf] = res[nhalf:n] - indices = np.arange(0, nhalf, 1) - - res[2 * indices] = (a / 2.) * (sm[0:nhalf] + det[0:nhalf] * np.sqrt(sm[0:nhalf])) - res[2 * indices + 1] = (a / 2.) * (sm[0:nhalf] - det[0:nhalf] * np.sqrt(sm[0:nhalf])) - res[res < 0.] = 0. - n = 2 * n - nhalf = 2 * nhalf - return res + a = 2.0 + n = data.shape[0] + nhalf = n // 2 + J = np.log2(n) + res = data.copy() + sm = np.zeros(shape=nhalf) + det = sm.copy() + + for i in np.arange(0, J, 1): + indices = np.arange(0, nhalf, 1) + + sm[0:nhalf] = (res[2 * indices] + res[2 * indices + 1]) / a + det[0:nhalf] = (res[2 * indices] - res[2 * indices + 1]) / a + res[0:nhalf] = sm[0:nhalf] + res[(nhalf):n] = det[0:nhalf] + n = n // 2 + nhalf = nhalf // 2 + + nhalf = 1 + n = 2 + + for i in np.arange(0, J, 1): + sm[0:nhalf] = res[0:nhalf] + det[0:nhalf] = res[nhalf:n] + indices = np.arange(0, nhalf, 1) + + res[2 * indices] = (a / 2.0) * ( + sm[0:nhalf] + det[0:nhalf] * np.sqrt(sm[0:nhalf]) + ) + res[2 * indices + 1] = (a / 2.0) * ( + sm[0:nhalf] - det[0:nhalf] * np.sqrt(sm[0:nhalf]) + ) + res[res < 0.0] = 0.0 + n = 2 * n + nhalf = 2 * nhalf + return res if __name__ == "__main__": - import matplotlib.pyplot as plt - - s = np.random.poisson(5, 4) * 0 + 1 - s2 = np.random.poisson(20, 4) * 0 + 3 - s = np.concatenate((s, s2)).astype(float) - plt.plot(s) - v = haar_fisz_transform(s) - s_inv = inverse_haar_fisz_transform(v) - plt.plot(v) - plt.plot(s_inv, '--') - plt.show() + import matplotlib.pyplot as plt + + s = np.random.poisson(5, 4) * 0 + 1 + s2 = np.random.poisson(20, 4) * 0 + 3 + s = np.concatenate((s, s2)).astype(float) + plt.plot(s) + v = haar_fisz_transform(s) + s_inv = inverse_haar_fisz_transform(v) + plt.plot(v) + plt.plot(s_inv, "--") + plt.show() diff --git a/stpy/helpers/helper.py b/stpy/helpers/helper.py index 26591ed..67a9e63 100755 --- a/stpy/helpers/helper.py +++ b/stpy/helpers/helper.py @@ -8,531 +8,564 @@ from torch.autograd.functional import jacobian -def isin(element, test_elements, assume_unique=False, atol = 1e-10): - (n, d) = element.shape - (m, d) = test_elements.shape - maskFull = np.full((n), False, dtype=bool) - for j in range(m): - mask = np.full((n), True, dtype=bool) - for i in range(d): - # mask = np.logical_and(mask,np.in1d(element[:,i],test_elements[j,i], assume_unique=assume_unique)) - mask = np.logical_and(mask, np.isclose(element[:, i], test_elements[j, i], atol=atol)) - # print (j, i, mask) - maskFull = np.logical_or(mask, maskFull) - # print (maskFull) - return maskFull - - - -def cartesian(arrays, out=None, dtype = None): - """ - Generate a cartesian product of input arrays. - - Parameters - ---------- - arrays : list of array-like - 1-D arrays to form the cartesian product of. - out : ndarray - Array to place the cartesian product in. - - Returns - ------- - out : ndarray - 2-D array of shape (M, len(arrays)) containing cartesian products - formed of input arrays. - - """ - arrays = [np.asarray(x) for x in arrays] - if dtype is None: - dtype = arrays[0].dtype - n = np.prod([x.size for x in arrays]) - if out is None: - out = np.zeros([n, len(arrays)], dtype=dtype) - - m = n / arrays[0].size - m = int(m) - out[:, 0] = np.repeat(arrays[0], m) - if arrays[1:]: - cartesian(arrays[1:], out=out[0:m, 1:]) - for j in range(1, arrays[0].size): - out[j * m:(j + 1) * m, 1:] = out[0:m, 1:] - return out - - -def estimate_std(x: torch.Tensor, # x values used for uniqueness detection - y: torch.Tensor, # y values - truncation:Union[float,None] = None, # truncate at specific y - verbose:bool = False, # verbosity level - conservative:bool = False, - return_all_residuals:bool = False # return - ): # - - out, indices, counts = torch.unique(x, dim=0, return_inverse=True, return_counts=True) - residuals_mean_list = [] - - for i in range(counts.size()[0]): - if counts[i] > 1: - mask = indices == i - mean = torch.mean(y[mask].view(-1)) - residuals_mean_list.append(y[mask].view(-1)-mean.view(-1)) - residuals_mean = torch.hstack(residuals_mean_list) - - if verbose: - print ("Estimating variance from:",residuals_mean.size()) - - if truncation is not None: - residuals_mean_trunc = residuals_mean[torch.abs(residuals_mean) 1: + mask = indices == i + mean = torch.mean(y[mask].view(-1)) + residuals_mean_list.append(y[mask].view(-1) - mean.view(-1)) + residuals_mean = torch.hstack(residuals_mean_list) + + if verbose: + print("Estimating variance from:", residuals_mean.size()) + + if truncation is not None: + residuals_mean_trunc = residuals_mean[torch.abs(residuals_mean) < truncation] + sigma_std = torch.std(residuals_mean_trunc) + else: + sigma_std = torch.std(residuals_mean) + + if return_all_residuals: + return residuals_mean_list, out, counts, residuals_mean, indices + else: + return sigma_std def direct_sum(arrays): - dim = np.sum([array.shape[1] for array in arrays]) - size = np.sum([array.shape[0] for array in arrays]) + dim = np.sum([array.shape[1] for array in arrays]) + size = np.sum([array.shape[0] for array in arrays]) - out = np.zeros(shape=(size, dim)) - dim = 0 - n = 0 - for j in range(len(arrays)): - new_n, new_dim = arrays[j].shape - out[n:n + new_n, dim:dim + new_dim] = arrays[j] - dim = dim + new_dim - n = n + new_n + out = np.zeros(shape=(size, dim)) + dim = 0 + n = 0 + for j in range(len(arrays)): + new_n, new_dim = arrays[j].shape + out[n : n + new_n, dim : dim + new_dim] = arrays[j] + dim = dim + new_dim + n = n + new_n - return out + return out def symsqrt(matrix): - """Compute the square root of a positive definite matrix.""" - # perform the decomposition - # s, v = matrix.symeig(eigenvectors=True) - _, s, v = matrix.svd() # passes torch.autograd.gradcheck() - # truncate small components - above_cutoff = s > s.max() * s.size(-1) * torch.finfo(s.dtype).eps - s = s[..., above_cutoff] - v = v[..., above_cutoff] - # compose the square root matrix - return (v * s.sqrt().unsqueeze(-2)) @ v.transpose(-2, -1) + """Compute the square root of a positive definite matrix.""" + # perform the decomposition + # s, v = matrix.symeig(eigenvectors=True) + _, s, v = matrix.svd() # passes torch.autograd.gradcheck() + # truncate small components + above_cutoff = s > s.max() * s.size(-1) * torch.finfo(s.dtype).eps + s = s[..., above_cutoff] + v = v[..., above_cutoff] + # compose the square root matrix + return (v * s.sqrt().unsqueeze(-2)) @ v.transpose(-2, -1) def interval(n, d, L_infinity_ball=1, offset=None): - if offset is None: - arrays = [np.linspace(-L_infinity_ball, L_infinity_ball, n).reshape(n, 1) for i in range(d)] - xtest = cartesian(arrays) - else: - arrays = [np.linspace(offset[i][0], offset[i][1], n).reshape(n, 1) for i in range(d)] - xtest = cartesian(arrays) - return xtest + if offset is None: + arrays = [ + np.linspace(-L_infinity_ball, L_infinity_ball, n).reshape(n, 1) + for i in range(d) + ] + xtest = cartesian(arrays) + else: + arrays = [ + np.linspace(offset[i][0], offset[i][1], n).reshape(n, 1) for i in range(d) + ] + xtest = cartesian(arrays) + return xtest def interval_torch(n, d, L_infinity_ball=1, offset=None): - return torch.from_numpy(interval(n, d, L_infinity_ball=L_infinity_ball, offset=offset)) + return torch.from_numpy( + interval(n, d, L_infinity_ball=L_infinity_ball, offset=offset) + ) def get_ecdf(x): - x = np.sort(x) + x = np.sort(x) - def result(v): - return np.searchsorted(x, v, side='right') / x.size + def result(v): + return np.searchsorted(x, v, side="right") / x.size - return result + return result def emprical_cdf(data): - """ - #>>> import numpy as np - #>>> emprical_cdf(np.array([1.,2.,3.,1.,2.])) - #[1.,2.,3.],[0.4,0.4,0.2] - """ - - # create a sorted series of unique data - cdfx = np.sort(np.unique(data)) - # x-data for the ECDF: evenly spaced sequence of the uniques - x_values = np.linspace(start=min(cdfx), - stop=max(cdfx), num=len(cdfx)) - - # size of the x_values - size_data = data.shape[0] - # y-data for the ECDF: - y_values = [] - for i in x_values: - # all the values in raw data less than the ith value in x_values - temp = data[data <= i] - # fraction of that value with respect to the size of the x_values - value = float(temp.shape[0]) / float(size_data) - # pushing the value in the y_values - y_values.append(value) - # return both x and y values - return x_values, np.array(y_values) + """ + #>>> import numpy as np + #>>> emprical_cdf(np.array([1.,2.,3.,1.,2.])) + #[1.,2.,3.],[0.4,0.4,0.2] + """ + + # create a sorted series of unique data + cdfx = np.sort(np.unique(data)) + # x-data for the ECDF: evenly spaced sequence of the uniques + x_values = np.linspace(start=min(cdfx), stop=max(cdfx), num=len(cdfx)) + + # size of the x_values + size_data = data.shape[0] + # y-data for the ECDF: + y_values = [] + for i in x_values: + # all the values in raw data less than the ith value in x_values + temp = data[data <= i] + # fraction of that value with respect to the size of the x_values + value = float(temp.shape[0]) / float(size_data) + # pushing the value in the y_values + y_values.append(value) + # return both x and y values + return x_values, np.array(y_values) def batch_jacobian(f, x, create_graph=False, vectorize=False): - f_sum = lambda x: torch.sum(f(x), axis=0) - return jacobian(f_sum, x, create_graph=create_graph, vectorize=vectorize) + f_sum = lambda x: torch.sum(f(x), axis=0) + return jacobian(f_sum, x, create_graph=create_graph, vectorize=vectorize) def batch_hessian(f, x, create_graph=False, vectorize=False, vv=False): - J = lambda x: batch_jacobian(f, x, create_graph=True, vectorize=vectorize).transpose(0, 1) - H = batch_jacobian(J, x, create_graph=create_graph, vectorize=vv) - return H + J = lambda x: batch_jacobian( + f, x, create_graph=True, vectorize=vectorize + ).transpose(0, 1) + H = batch_jacobian(J, x, create_graph=create_graph, vectorize=vv) + return H def create_pull_back(low, high, inverse=False, to=[-1, 1]): - translate = lambda x: x * (to[0] - to[1]) / (low - high) + to[1] - ((to[0] - to[1]) * high) / (low - high) - if inverse: - translate_back = lambda x: x * (low - high) / (to[0] - to[1]) + high - to[1] * (low - high) / (to[0] - to[1]) - return translate, translate_back - else: - return translate + translate = ( + lambda x: x * (to[0] - to[1]) / (low - high) + + to[1] + - ((to[0] - to[1]) * high) / (low - high) + ) + if inverse: + translate_back = ( + lambda x: x * (low - high) / (to[0] - to[1]) + + high + - to[1] * (low - high) / (to[0] - to[1]) + ) + return translate, translate_back + else: + return translate def hierarchical_distance(group1, group2): - group3 = copy.deepcopy(group2) - group4 = copy.deepcopy(group1) - for elem in group1: - try: - group3.remove(elem) - group4.remove(elem) - except: - pass - if len(group3) == 1 and len(group3[0]) == 1 and len(group4) == 0: - return 1 - - isin = lambda set, set2: [] - for a, b in list(itertools.product(group1, group1)): - new_group = copy.deepcopy(group1) - if a != b: - new_group.remove(b) - new_group.remove(a) - new_group.append(a + b) - if len(new_group) == len(group2) and all(i in new_group for i in group2): - return 1 - return 2 + group3 = copy.deepcopy(group2) + group4 = copy.deepcopy(group1) + for elem in group1: + try: + group3.remove(elem) + group4.remove(elem) + except: + pass + if len(group3) == 1 and len(group3[0]) == 1 and len(group4) == 0: + return 1 + + isin = lambda set, set2: [] + for a, b in list(itertools.product(group1, group1)): + new_group = copy.deepcopy(group1) + if a != b: + new_group.remove(b) + new_group.remove(a) + new_group.append(a + b) + if len(new_group) == len(group2) and all(i in new_group for i in group2): + return 1 + return 2 def valid_enlargement(curr, groups): - out = [] - for index, group in enumerate(groups): - if hierarchical_distance(curr, group) == 1: - out.append(index) - return out + out = [] + for index, group in enumerate(groups): + if hierarchical_distance(curr, group) == 1: + out.append(index) + return out def interval_groups(n, d, groups, L_infinity_ball=1): - arrays = [interval(n, len(groups[i]), L_infinity_ball=L_infinity_ball) for i in range(len(groups))] - xtest = direct_sum(arrays) - out = np.zeros(shape=(xtest.shape[0], d)) - out[:, 0:xtest.shape[1]] = xtest - return out + arrays = [ + interval(n, len(groups[i]), L_infinity_ball=L_infinity_ball) + for i in range(len(groups)) + ] + xtest = direct_sum(arrays) + out = np.zeros(shape=(xtest.shape[0], d)) + out[:, 0 : xtest.shape[1]] = xtest + return out def logsumexp(a, axis=None, b=None): - a = np.asarray(a) - if axis is None: - a = a.ravel() - else: - a = np.rollaxis(a, axis) - a_max = a.max(axis=0) - if b is not None: - b = np.asarray(b) - if axis is None: - b = b.ravel() - else: - b = np.rollaxis(b, axis) - out = np.log(np.sum(b * np.exp(a - a_max), axis=0)) - else: - out = np.log(np.sum(np.exp(a - a_max), axis=0)) - out += a_max - return out + a = np.asarray(a) + if axis is None: + a = a.ravel() + else: + a = np.rollaxis(a, axis) + a_max = a.max(axis=0) + if b is not None: + b = np.asarray(b) + if axis is None: + b = b.ravel() + else: + b = np.rollaxis(b, axis) + out = np.log(np.sum(b * np.exp(a - a_max), axis=0)) + else: + out = np.log(np.sum(np.exp(a - a_max), axis=0)) + out += a_max + return out class MyBounds(object): - def __init__(self, xmax=[1.1, 1.1], xmin=[-1.1, -1.1]): - self.xmax = np.array(xmax) - self.xmin = np.array(xmin) + def __init__(self, xmax=[1.1, 1.1], xmin=[-1.1, -1.1]): + self.xmax = np.array(xmax) + self.xmin = np.array(xmin) - def __call__(self, **kwargs): - x = kwargs["x_new"] - tmax = bool(np.all(x <= self.xmax)) - tmin = bool(np.all(x >= self.xmin)) - return tmax and tmin + def __call__(self, **kwargs): + x = kwargs["x_new"] + tmax = bool(np.all(x <= self.xmax)) + tmin = bool(np.all(x >= self.xmin)) + return tmax and tmin def full_group(d): - g = [] - for i in range(d): - g.append([i]) - return g + g = [] + for i in range(d): + g.append([i]) + return g def pair_groups(d): - g = [] - for i in range(d - 1): - g.append([i, i + 1]) - return g + g = [] + for i in range(d - 1): + g.append([i, i + 1]) + return g def conditional_decorator(dec, condition): - def decorator(func): - if not condition: - # Return the function unchanged, not decorated. - return func - return dec(func) + def decorator(func): + if not condition: + # Return the function unchanged, not decorated. + return func + return dec(func) + + return decorator - return decorator def generate_all_pairs(d): - groups = [] - for elem in range(d): - for elem2 in range(d): - groups.append([elem, elem2]) - return groups + groups = [] + for elem in range(d): + for elem2 in range(d): + groups.append([elem, elem2]) + return groups def generate_groups(d, elements=None): - """ - returns a list of all possible groups combinations of d elements - :param d: integer - :return: - >>> generate_groups(1) - [[0]] - >>> generate_groups(2) - [[[0], [1]], [[1], [0]], [[0, 1]]] - """ - if elements is None: - elements = list(range(d)) - g = [] - if len(elements) == 1: - return [elements] - - for r in range(1, d + 1, 1): - gn = [list(a) for a in list(itertools.combinations(elements, r))] - for i in gn: - elements2 = list(set(elements) - set(i)) - g.append([i] + generate_groups(d, elements=elements2)) - return g + """ + returns a list of all possible groups combinations of d elements + :param d: integer + :return: + >>> generate_groups(1) + [[0]] + >>> generate_groups(2) + [[[0], [1]], [[1], [0]], [[0, 1]]] + """ + if elements is None: + elements = list(range(d)) + g = [] + if len(elements) == 1: + return [elements] + + for r in range(1, d + 1, 1): + gn = [list(a) for a in list(itertools.combinations(elements, r))] + for i in gn: + elements2 = list(set(elements) - set(i)) + g.append([i] + generate_groups(d, elements=elements2)) + return g class results: - def __init__(self): - self.x = 0 + def __init__(self): + self.x = 0 def proj(x, bounds): - y = np.zeros(shape=x.shape) - for ind, elem in enumerate(x): - if elem > bounds[ind][1]: - y[ind] = bounds[ind][1] + y = np.zeros(shape=x.shape) + for ind, elem in enumerate(x): + if elem > bounds[ind][1]: + y[ind] = bounds[ind][1] - elif elem < bounds[ind][0]: - y[ind] = bounds[ind][0] + elif elem < bounds[ind][0]: + y[ind] = bounds[ind][0] - else: - y[ind] = elem - return y + else: + y[ind] = elem + return y def lambda_coordinate(fun, x0, index, x): - x0[index] = x - r = fun(x0) - return r - - -def projected_gradient_descent(fun, grad, x, bounds, maxit=10e23, verbose=False, tol=0.000001, nu=0.001): - i = 0 - x_old = x + np.random.randn(x.shape[0]) - while (i < maxit and np.linalg.norm(x - x_old) > tol): - x_old = x - x = x - (100 * nu) * grad(x) - x = proj(x, bounds) - - if verbose == True: - print("Iteration: ", i, " ", fun(x)) - i += 1 - res = results() - res.x = x - return res - - -def projected_gradient_descent(fun, grad, x, bounds, maxit=10e23, verbose=False, tol=0.000001, nu=0.001): - i = 0 - x_old = x + np.random.randn(x.shape[0]) - while (i < maxit and np.linalg.norm(x - x_old) > tol): - x_old = x - x = x - (100 * nu) * grad(x) - x = proj(x, bounds) - - if verbose == True: - print("Iteration: ", i, " ", fun(x)) - i += 1 - res = results() - res.x = x - return res + x0[index] = x + r = fun(x0) + return r + + +def projected_gradient_descent( + fun, grad, x, bounds, maxit=10e23, verbose=False, tol=0.000001, nu=0.001 +): + i = 0 + x_old = x + np.random.randn(x.shape[0]) + while i < maxit and np.linalg.norm(x - x_old) > tol: + x_old = x + x = x - (100 * nu) * grad(x) + x = proj(x, bounds) + + if verbose == True: + print("Iteration: ", i, " ", fun(x)) + i += 1 + res = results() + res.x = x + return res + + +def projected_gradient_descent( + fun, grad, x, bounds, maxit=10e23, verbose=False, tol=0.000001, nu=0.001 +): + i = 0 + x_old = x + np.random.randn(x.shape[0]) + while i < maxit and np.linalg.norm(x - x_old) > tol: + x_old = x + x = x - (100 * nu) * grad(x) + x = proj(x, bounds) + + if verbose == True: + print("Iteration: ", i, " ", fun(x)) + i += 1 + res = results() + res.x = x + return res def complex_step_derivative(fun, h, x): - d = x.shape[1] - der = np.zeros(shape=(1, d)) - for i in range(d): - one = np.zeros(shape=(1, d)) - one[0, i] = 1.0 - der[0, i] = np.imag((fun(x + 1j * h * one) - fun(x))) / h - return der + d = x.shape[1] + der = np.zeros(shape=(1, d)) + for i in range(d): + one = np.zeros(shape=(1, d)) + one[0, i] = 1.0 + der[0, i] = np.imag((fun(x + 1j * h * one) - fun(x))) / h + return der def finite_differences(fun, h, x): - d = x.size()[1] - der = torch.zeros(size=(1, d), dtype=torch.float64) - for i in range(d): - one = torch.zeros(size=(1, d), dtype=torch.float64) - one[0, i] = 1.0 - der[0, i] = (fun(x + one * h) - fun(x)) / h - return der + d = x.size()[1] + der = torch.zeros(size=(1, d), dtype=torch.float64) + for i in range(d): + one = torch.zeros(size=(1, d), dtype=torch.float64) + one[0, i] = 1.0 + der[0, i] = (fun(x + one * h) - fun(x)) / h + return der def finite_differences_hessian(fun, h, x): - d = x.size()[1] - hess = torch.zeros(size=(d, d), dtype=torch.float64) - for i in range(d): - for j in range(d): - one_i = torch.zeros(size=(1, d), dtype=torch.float64) - one_j = torch.zeros(size=(1, d), dtype=torch.float64) - one_i[0, i] = 1.0 - one_j[0, j] = 1.0 - hess[i, j] = np.log( - np.abs(fun(x + h * one_i + h * one_j) - fun(x + h * one_i) - fun(x + h * one_j) + fun(x))) - 2 * np.log( - h) - - hess = torch.exp(hess) - return (hess + torch.t(hess)) / 2. + d = x.size()[1] + hess = torch.zeros(size=(d, d), dtype=torch.float64) + for i in range(d): + for j in range(d): + one_i = torch.zeros(size=(1, d), dtype=torch.float64) + one_j = torch.zeros(size=(1, d), dtype=torch.float64) + one_i[0, i] = 1.0 + one_j[0, j] = 1.0 + hess[i, j] = np.log( + np.abs( + fun(x + h * one_i + h * one_j) + - fun(x + h * one_i) + - fun(x + h * one_j) + + fun(x) + ) + ) - 2 * np.log(h) + + hess = torch.exp(hess) + return (hess + torch.t(hess)) / 2.0 def finite_differences_np(fun, h, x): - d = x.shape[0] - der = np.zeros(shape=(d)) - for i in range(d): - one = np.zeros(shape=(d)) - one[i] = 1.0 - der[i] = (fun(x + one * h) - fun(x)) / h - return der + d = x.shape[0] + der = np.zeros(shape=(d)) + for i in range(d): + one = np.zeros(shape=(d)) + one[i] = 1.0 + der[i] = (fun(x + one * h) - fun(x)) / h + return der -def finite_differences_test(fun, fun_der, x, h_max=1.): - n = 10 - for i in range(n): - h = 2 ** (-i) * h_max - approx_nabla = finite_differences_np(fun, h, x) - print(i, h, np.linalg.norm(approx_nabla - fun_der(x))) +def finite_differences_test(fun, fun_der, x, h_max=1.0): + n = 10 + for i in range(n): + h = 2 ** (-i) * h_max + approx_nabla = finite_differences_np(fun, h, x) + print(i, h, np.linalg.norm(approx_nabla - fun_der(x))) def sample_custom(inverse_cumulative_distribution, size=(1, 1)): - U = np.random.uniform(0, 1, size=size) - F = np.vectorize(inverse_cumulative_distribution) - Z = F(U) - return Z + U = np.random.uniform(0, 1, size=size) + F = np.vectorize(inverse_cumulative_distribution) + Z = F(U) + return Z def select_subset(M, S): - d = M.shape[0] - I = np.zeros(shape=(d, d)) - I[S, S] = 1. - return I @ M @ I + d = M.shape[0] + I = np.zeros(shape=(d, d)) + I[S, S] = 1.0 + return I @ M @ I def select_subset_inv(M, S): - M = select_subset(M, S) - return np.linalg.pinv(M) + M = select_subset(M, S) + return np.linalg.pinv(M) def complement_set(S, size): - V = set(np.arange(0, size, 1)) - s = V - set(S) - S_C = list(s) - return S_C + V = set(np.arange(0, size, 1)) + s = V - set(S) + S_C = list(s) + return S_C def add_element(elements, new_element): - new_out = [] - for element in elements: - new_out.append(element + [[new_element]]) - new_out.append(element) - for j in element: - new = copy.deepcopy(element) - new.remove(j) - new.append(j + [new_element]) - new_out.append(new) + new_out = [] + for element in elements: + new_out.append(element + [[new_element]]) + new_out.append(element) + for j in element: + new = copy.deepcopy(element) + new.remove(j) + new.append(j + [new_element]) + new_out.append(new) - return new_out + return new_out def get_hierarchy(start=1, new_elements=[2, 3, 4]): - elements = [[[start]]] - for new_element in new_elements: - elements = add_element(elements, new_element) - l = [] - for element in elements: - l.append(np.sum([3 ** len(e) for e in element])) - indices = np.argsort(l) - out = [] - for index in indices: - out.append(elements[index]) - return out + elements = [[[start]]] + for new_element in new_elements: + elements = add_element(elements, new_element) + l = [] + for element in elements: + l.append(np.sum([3 ** len(e) for e in element])) + indices = np.argsort(l) + out = [] + for index in indices: + out.append(elements[index]) + return out def likelihood_bernoulli_test(alpha, delta, failure): - if alpha == 1.: - alpha = 0.99999 + if alpha == 1.0: + alpha = 0.99999 - p = (1 - (np.log(alpha / delta)) / np.log((1 - alpha) / (1 - delta))) ** (-1) + p = (1 - (np.log(alpha / delta)) / np.log((1 - alpha) / (1 - delta))) ** (-1) - dkl = p * np.log(p / delta) + (1 - p) * np.log((1 - p) / (1 - delta)) - n = np.log(2 / failure) / dkl - k = n * p - return n, k + dkl = p * np.log(p / delta) + (1 - p) * np.log((1 - p) / (1 - delta)) + n = np.log(2 / failure) / dkl + k = n * p + return n, k def median_of_means(list, delta=0.01): - r = list.shape[0] - if r > 3: - k = r - N = int(np.floor(r / k)) - means = [] - for j in range(k - 1): - means.append((1. / N) * np.sum(list[(j * N):(j + 1) * N])) - return np.median(means) - else: - return 0. - - -def get_indices(xtest,x): - """ - Find location of vectors in a larger set - :param xtest: torch.Tensor, tensor to be located - :param x: torch.Tensor, to be located in xtest - :return: list, if None its means it was not found in the original tensor - """ - - indices = [] - for i in range(x.size()[0]): - xtrial = x[i,:] - mask = torch.all(xtest == xtrial, dim=1) - if torch.sum(mask) > 0: - index = int(torch.argmax(mask.int())) - indices.append(index) - else: - indices.append(None) - - return indices + r = list.shape[0] + if r > 3: + k = r + N = int(np.floor(r / k)) + means = [] + for j in range(k - 1): + means.append((1.0 / N) * np.sum(list[(j * N) : (j + 1) * N])) + return np.median(means) + else: + return 0.0 + + +def get_indices(xtest, x): + """ + Find location of vectors in a larger set + :param xtest: torch.Tensor, tensor to be located + :param x: torch.Tensor, to be located in xtest + :return: list, if None its means it was not found in the original tensor + """ + + indices = [] + for i in range(x.size()[0]): + xtrial = x[i, :] + mask = torch.all(xtest == xtrial, dim=1) + if torch.sum(mask) > 0: + index = int(torch.argmax(mask.int())) + indices.append(index) + else: + indices.append(None) + + return indices + if __name__ == "__main__": - x = torch.arange(0,9,1).reshape(3,3) - xtrial = torch.Tensor([[0,1,2],[6,7,8],[3,4,5]]) - print (x) - print (get_indices(x,xtrial)) + x = torch.arange(0, 9, 1).reshape(3, 3) + xtrial = torch.Tensor([[0, 1, 2], [6, 7, 8], [3, 4, 5]]) + print(x) + print(get_indices(x, xtrial)) diff --git a/stpy/helpers/plot_helper.py b/stpy/helpers/plot_helper.py index 1b2eece..3b891b7 100644 --- a/stpy/helpers/plot_helper.py +++ b/stpy/helpers/plot_helper.py @@ -4,116 +4,133 @@ import webcolors -def plot_ellipse(offset, cov, scale=1, theta_num=1e3, axis=None, plot_kwargs=None, fill=False, fill_kwargs=None): - ''' - offset = 2d array which gives center of ellipse - cov = covariance of ellipse - scale = scale ellipse by constant factor - theta_num = used for a linspace below, not sure exactly (?) - - ''' - # Get Ellipse Properties from cov matrix - - eig_vec, eig_val, u = np.linalg.svd(cov) - # Make sure 0th eigenvector has positive x-coordinate - if eig_vec[0][0] < 0: - eig_vec[0] *= -1 - - semimaj = np.sqrt(eig_val[0]) - semimin = np.sqrt(eig_val[1]) - semimaj *= scale - semimin *= scale - - phi = np.arccos(np.dot(eig_vec[0], np.array([1, 0]))) - if eig_vec[0][1] < 0 and phi > 0: - phi *= -1 - - # Generate data for ellipse structure - theta = np.linspace(0, 2 * np.pi, theta_num) - r = 1 / np.sqrt((np.cos(theta)) ** 2 + (np.sin(theta)) ** 2) - x = r * np.cos(theta) - y = r * np.sin(theta) - data = np.array([x, y]) - S = np.array([[semimaj, 0], [0, semimin]]) - R = np.array([[np.cos(phi), -np.sin(phi)], [np.sin(phi), np.cos(phi)]]) - T = np.dot(R, S) - data = np.dot(T, data) - data[0] += offset[0] - data[1] += offset[1] - - # Plot! - return_fig = False - if axis is None: - axis = plt.gca() - - if plot_kwargs is None: - p, = axis.plot(data[0], data[1], color='r', linestyle='-') - else: - p, = axis.plot(data[0], data[1], **plot_kwargs) - - if fill == True: - if fill_kwargs is None: - fill_kwargs = dict() - axis.fill(data[0], data[1], alpha=0.2, color='r') +def plot_ellipse( + offset, + cov, + scale=1, + theta_num=1e3, + axis=None, + plot_kwargs=None, + fill=False, + fill_kwargs=None, +): + """ + offset = 2d array which gives center of ellipse + cov = covariance of ellipse + scale = scale ellipse by constant factor + theta_num = used for a linspace below, not sure exactly (?) + + """ + # Get Ellipse Properties from cov matrix + + eig_vec, eig_val, u = np.linalg.svd(cov) + # Make sure 0th eigenvector has positive x-coordinate + if eig_vec[0][0] < 0: + eig_vec[0] *= -1 + + semimaj = np.sqrt(eig_val[0]) + semimin = np.sqrt(eig_val[1]) + semimaj *= scale + semimin *= scale + + phi = np.arccos(np.dot(eig_vec[0], np.array([1, 0]))) + if eig_vec[0][1] < 0 and phi > 0: + phi *= -1 + + # Generate data for ellipse structure + theta = np.linspace(0, 2 * np.pi, theta_num) + r = 1 / np.sqrt((np.cos(theta)) ** 2 + (np.sin(theta)) ** 2) + x = r * np.cos(theta) + y = r * np.sin(theta) + data = np.array([x, y]) + S = np.array([[semimaj, 0], [0, semimin]]) + R = np.array([[np.cos(phi), -np.sin(phi)], [np.sin(phi), np.cos(phi)]]) + T = np.dot(R, S) + data = np.dot(T, data) + data[0] += offset[0] + data[1] += offset[1] + + # Plot! + return_fig = False + if axis is None: + axis = plt.gca() + + if plot_kwargs is None: + (p,) = axis.plot(data[0], data[1], color="r", linestyle="-") + else: + (p,) = axis.plot(data[0], data[1], **plot_kwargs) + + if fill == True: + if fill_kwargs is None: + fill_kwargs = dict() + axis.fill(data[0], data[1], alpha=0.2, color="r") def closest_colour(requested_colour): - min_colours = {} - for name, key in webcolors.css3_hex_to_names.items(): - r_c, g_c, b_c = webcolors.hex_to_rgb(key) - rd = (r_c - requested_colour[0]) ** 2 - gd = (g_c - requested_colour[1]) ** 2 - bd = (b_c - requested_colour[2]) ** 2 - min_colours[(rd + gd + bd)] = name - return min_colours[min(min_colours.keys())] + min_colours = {} + for name, key in webcolors.css3_hex_to_names.items(): + r_c, g_c, b_c = webcolors.hex_to_rgb(key) + rd = (r_c - requested_colour[0]) ** 2 + gd = (g_c - requested_colour[1]) ** 2 + bd = (b_c - requested_colour[2]) ** 2 + min_colours[(rd + gd + bd)] = name + return min_colours[min(min_colours.keys())] def get_colour_name(requested_colour): - try: - closest_name = actual_name = webcolors.rgb_to_name(requested_colour) - except ValueError: - closest_name = closest_colour(requested_colour) - actual_name = None - return actual_name, closest_name - - -def colorline(x, y, z=None, cmap=plt.get_cmap('copper'), norm=plt.Normalize(0.0, 1.0), - linewidth=3, alpha=1.0): - """ - http://nbviewer.ipython.org/github/dpsanders/matplotlib-examples/blob/master/colorline.ipynb - http://matplotlib.org/examples/pylab_examples/multicolored_line.html - Plot a colored line with coordinates x and y - Optionally specify colors in the array z - Optionally specify a colormap, a norm function and a line width - """ - - # Default colors equally spaced on [0,1]: - if z is None: - z = np.linspace(0.0, 1.0, len(x)) - - # Special case if a single number: - if not hasattr(z, "__iter__"): # to check for numerical input -- this is a hack - z = np.array([z]) - - z = np.asarray(z) - - segments = make_segments(x, y) - lc = mcoll.LineCollection(segments, array=z, cmap=cmap, norm=norm, - linewidth=linewidth, alpha=alpha) - - ax = plt.gca() - ax.add_collection(lc) - - return lc + try: + closest_name = actual_name = webcolors.rgb_to_name(requested_colour) + except ValueError: + closest_name = closest_colour(requested_colour) + actual_name = None + return actual_name, closest_name + + +def colorline( + x, + y, + z=None, + cmap=plt.get_cmap("copper"), + norm=plt.Normalize(0.0, 1.0), + linewidth=3, + alpha=1.0, +): + """ + http://nbviewer.ipython.org/github/dpsanders/matplotlib-examples/blob/master/colorline.ipynb + http://matplotlib.org/examples/pylab_examples/multicolored_line.html + Plot a colored line with coordinates x and y + Optionally specify colors in the array z + Optionally specify a colormap, a norm function and a line width + """ + + # Default colors equally spaced on [0,1]: + if z is None: + z = np.linspace(0.0, 1.0, len(x)) + + # Special case if a single number: + if not hasattr(z, "__iter__"): # to check for numerical input -- this is a hack + z = np.array([z]) + + z = np.asarray(z) + + segments = make_segments(x, y) + lc = mcoll.LineCollection( + segments, array=z, cmap=cmap, norm=norm, linewidth=linewidth, alpha=alpha + ) + + ax = plt.gca() + ax.add_collection(lc) + + return lc def make_segments(x, y): - """ - Create list of line segments from x and y coordinates, in the correct format - for LineCollection: an array of the form numlines x (points per line) x 2 (x - and y) array - """ - - points = np.array([x, y]).T.reshape(-1, 1, 2) - segments = np.concatenate([points[:-1], points[1:]], axis=1) - return segments + """ + Create list of line segments from x and y coordinates, in the correct format + for LineCollection: an array of the form numlines x (points per line) x 2 (x + and y) array + """ + + points = np.array([x, y]).T.reshape(-1, 1, 2) + segments = np.concatenate([points[:-1], points[1:]], axis=1) + return segments diff --git a/stpy/helpers/plotting_helper.py b/stpy/helpers/plotting_helper.py index 6a4fa7d..c350229 100644 --- a/stpy/helpers/plotting_helper.py +++ b/stpy/helpers/plotting_helper.py @@ -2,7 +2,18 @@ import sklearn -def plot_R2(vals, lcb, ucb, truth, s, truth_lcb=None, truth_ucb=None, show=False, save_file_name=None, name=None): +def plot_R2( + vals, + lcb, + ucb, + truth, + s, + truth_lcb=None, + truth_ucb=None, + show=False, + save_file_name=None, + name=None, +): r2 = sklearn.metrics.r2_score(truth, vals) if save_file_name is not None: filename = save_file_name @@ -15,22 +26,28 @@ def plot_R2(vals, lcb, ucb, truth, s, truth_lcb=None, truth_ucb=None, show=False plt.xlabel("true") plt.ylabel("predicted") - plt.plot(truth, truth, 'k-') - plt.plot(truth, truth + s, 'k--') - plt.plot(truth, truth - s, 'k--') - plt.plot(truth, vals, color='k', marker='o', linestyle='') - - plt.errorbar(truth, vals, yerr=vals - lcb, color='k', marker='o', linestyle='') + plt.plot(truth, truth, "k-") + plt.plot(truth, truth + s, "k--") + plt.plot(truth, truth - s, "k--") + plt.plot(truth, vals, color="k", marker="o", linestyle="") + plt.errorbar(truth, vals, yerr=vals - lcb, color="k", marker="o", linestyle="") if save_file_name is not None: plt.savefig(filename + "_0.png", dpi=150) - plt.errorbar(truth, vals, yerr=vals - lcb + 2 * s, color='r', marker='o', linestyle='', zorder = -10) + plt.errorbar( + truth, + vals, + yerr=vals - lcb + 2 * s, + color="r", + marker="o", + linestyle="", + zorder=-10, + ) if save_file_name is not None: plt.savefig(filename + "_1.png", dpi=150) - if show: plt.show() diff --git a/stpy/helpers/posterior_sampling.py b/stpy/helpers/posterior_sampling.py index 2318f27..632f0e4 100644 --- a/stpy/helpers/posterior_sampling.py +++ b/stpy/helpers/posterior_sampling.py @@ -7,214 +7,214 @@ class HmcSampler: - min_t = 0.00001 - - def __init__(self, dim, init, f, g, verbose): - """ - - :param dim: dimension - :param init: (dim, ), the initial value for HMC - :param f: (q, dim), coefficient for linear constraints - :param g: (q,), linear constraints: f*X+g >= 0 - """ - self.dim = dim - self.lastSample = init - self.f = f - self.g = g - self.verbose = verbose - - def getNextLinearHitTime(self, a, b): - """ - the position x(t) = a * sin(t) + b * cos(t) - - :param a: (dim, ) initial value for a (initial velocity) - :param b: (dim, ) initial value for b (initial position) - :return: hit_time: the time for the hit - cn : the cn-th constraint is active at hit time. - """ - hit_time = 0 - cn = 0 - - if self.f is None: - return hit_time, cn - - f = self.f - g = self.g - for i in range(f.shape[0]): - # constraints: f[i].dot(x)+g[i] >= 0 - fa = f[i].dot(a) - fb = f[i].dot(b) - u = np.sqrt(fa * fa + fb * fb) - # if u > g[i] and u > -g[i]: - if -u < g[i] < u: - # otherwise the constrain will always be satisfied - phi = np.arctan2(-fa, fb) # -pi < phi < pi - t1 = np.arccos(-g[i] / u) - phi # -pi < t1 < 2*pi - - if t1 < 0: - t1 += 2 * np.pi # 0 < t1 < 2*pi - if np.abs(t1) < self.min_t or \ - np.abs(t1 - 2 * np.pi) < self.min_t: - t1 = 0 - - t2 = -t1 - 2 * phi # -4*pi < t2 < 2*pi - if t2 < 0: - t2 += 2 * np.pi # -2*pi < t2 < 2*pi - if t2 < 0: - t2 += 2 * np.pi # 0 < t2 < 2*pi - - if np.abs(t2) < self.min_t or \ - np.abs(t2 - 2 * np.pi) < self.min_t: - t2 = 0 - - if t1 == 0: - t = t2 - elif t2 == 0: - t = t1 - else: - t = np.minimum(t1, t2) - - if self.min_t < t and (hit_time == 0 or t < hit_time): - hit_time = t - cn = i - return hit_time, cn - - def verifyConstraints(self, b): - """ - - :param b: - :return: - """ - if self.f is not None: - return np.min(self.f @ b + self.g) - else: - return 1 - - def sampleNext(self): - T = np.pi / 2 # how much time to move - b = self.lastSample - dim = self.dim - - count_sample_vel = 0 - - while True: - velsign = 0 - # sample new initial velocity - a = np.random.normal(0, 1, dim) - - count_sample_vel += 1 - if self.verbose and count_sample_vel % 50 == 0: - print("Has sampled %d times of initial velocity." % count_sample_vel) - - tt = T # the time left to move - while True: - t, c1 = self.getNextLinearHitTime(a, b) - # t: how much time to move to hit the boundary, if t == 0, move tt - # c1: the strict constraint at hit time - - if t == 0 or tt < t: - # if no wall to be hit (t == 0) or not enough - # time left to hit the wall (tt < t) - break - - tt -= t # time left to move after hitting the wall - new_b = np.sin(t) * a + np.cos(t) * b # hit location - hit_vel = np.cos(t) * a - np.sin(t) * b # hit velocity - b = new_b - # reflect the velocity and verify that it points in the right direction - f2 = np.dot(self.f[c1], self.f[c1]) - alpha = np.dot(self.f[c1], hit_vel) / f2 - a = hit_vel - 2 * alpha * self.f[c1] # reflected velocity - - velsign = a.dot(self.f[c1]) - - if velsign < 0: - # get out of inner while, resample the velocity and start again - # this occurs rarelly, due to numerical instabilities - break - - if velsign < 0: - # go to the beginning of outer while - continue - - bb = np.sin(tt) * a + np.cos(tt) * b - check = self.verifyConstraints(bb) - if check >= 0: - # verify that we don't violate the constraints - # due to a numerical instability - if self.verbose: - print("total number of velocity samples : %d" % count_sample_vel) - - self.lastSample = bb - return bb + min_t = 0.00001 + + def __init__(self, dim, init, f, g, verbose): + """ + + :param dim: dimension + :param init: (dim, ), the initial value for HMC + :param f: (q, dim), coefficient for linear constraints + :param g: (q,), linear constraints: f*X+g >= 0 + """ + self.dim = dim + self.lastSample = init + self.f = f + self.g = g + self.verbose = verbose + + def getNextLinearHitTime(self, a, b): + """ + the position x(t) = a * sin(t) + b * cos(t) + + :param a: (dim, ) initial value for a (initial velocity) + :param b: (dim, ) initial value for b (initial position) + :return: hit_time: the time for the hit + cn : the cn-th constraint is active at hit time. + """ + hit_time = 0 + cn = 0 + + if self.f is None: + return hit_time, cn + + f = self.f + g = self.g + for i in range(f.shape[0]): + # constraints: f[i].dot(x)+g[i] >= 0 + fa = f[i].dot(a) + fb = f[i].dot(b) + u = np.sqrt(fa * fa + fb * fb) + # if u > g[i] and u > -g[i]: + if -u < g[i] < u: + # otherwise the constrain will always be satisfied + phi = np.arctan2(-fa, fb) # -pi < phi < pi + t1 = np.arccos(-g[i] / u) - phi # -pi < t1 < 2*pi + + if t1 < 0: + t1 += 2 * np.pi # 0 < t1 < 2*pi + if np.abs(t1) < self.min_t or np.abs(t1 - 2 * np.pi) < self.min_t: + t1 = 0 + + t2 = -t1 - 2 * phi # -4*pi < t2 < 2*pi + if t2 < 0: + t2 += 2 * np.pi # -2*pi < t2 < 2*pi + if t2 < 0: + t2 += 2 * np.pi # 0 < t2 < 2*pi + + if np.abs(t2) < self.min_t or np.abs(t2 - 2 * np.pi) < self.min_t: + t2 = 0 + + if t1 == 0: + t = t2 + elif t2 == 0: + t = t1 + else: + t = np.minimum(t1, t2) + + if self.min_t < t and (hit_time == 0 or t < hit_time): + hit_time = t + cn = i + return hit_time, cn + + def verifyConstraints(self, b): + """ + + :param b: + :return: + """ + if self.f is not None: + return np.min(self.f @ b + self.g) + else: + return 1 + + def sampleNext(self): + T = np.pi / 2 # how much time to move + b = self.lastSample + dim = self.dim + + count_sample_vel = 0 + + while True: + velsign = 0 + # sample new initial velocity + a = np.random.normal(0, 1, dim) + + count_sample_vel += 1 + if self.verbose and count_sample_vel % 50 == 0: + print("Has sampled %d times of initial velocity." % count_sample_vel) + + tt = T # the time left to move + while True: + t, c1 = self.getNextLinearHitTime(a, b) + # t: how much time to move to hit the boundary, if t == 0, move tt + # c1: the strict constraint at hit time + + if t == 0 or tt < t: + # if no wall to be hit (t == 0) or not enough + # time left to hit the wall (tt < t) + break + + tt -= t # time left to move after hitting the wall + new_b = np.sin(t) * a + np.cos(t) * b # hit location + hit_vel = np.cos(t) * a - np.sin(t) * b # hit velocity + b = new_b + # reflect the velocity and verify that it points in the right direction + f2 = np.dot(self.f[c1], self.f[c1]) + alpha = np.dot(self.f[c1], hit_vel) / f2 + a = hit_vel - 2 * alpha * self.f[c1] # reflected velocity + + velsign = a.dot(self.f[c1]) + + if velsign < 0: + # get out of inner while, resample the velocity and start again + # this occurs rarelly, due to numerical instabilities + break + + if velsign < 0: + # go to the beginning of outer while + continue + + bb = np.sin(tt) * a + np.cos(tt) * b + check = self.verifyConstraints(bb) + if check >= 0: + # verify that we don't violate the constraints + # due to a numerical instability + if self.verbose: + print("total number of velocity samples : %d" % count_sample_vel) + + self.lastSample = bb + return bb def tmg(n, mu, M, initial, f=None, g=None, burn_in=30, verbose=False): - """ - This function generates samples from a Markov chain whose equilibrium distribution is a d-dimensional - multivariate Gaussian truncated by linear inequalities. The log probability density is - log p(X) = -0.5 (X-mu)^T M^-1 (X-mu) + const - in terms of a covariance matrix M and a mean vector mu. The constraints are imposed as explained below. - The Markov chain is built using the Hamiltonian Monte Carlo technique. - - :param n: Number of samples. - :param mu: (m,) vector for the mean of multivariate Gaussian density - :param M: (m,m) covariance matrix of the multivariate Gaussian density - :param initial: (m,) vector with the initial value of the Markov chain. Must satisfy - the truncation inequalities strictly. - :param f: (q,m) matrix, where q is the number of linear constraints. The constraints require each component - of the m-dimensional vector fX+g to be non-negative - :param g: (q,) vector with the constant terms in the above linear constraints. - :param burn_in: The number of burn-in iterations. The Markov chain is sampled n + burn_in - times, and the last n samples are returned. - :param verbose: - :return: (n, m) - """ - - dim = len(mu) - if M.shape[1] != dim: - raise ValueError("The covariance matrix must be square.") - - if len(initial) != dim: - raise ValueError("Wrong length for initial value vector.") - - # verify that M is positive definite, it will raise an error if M is not SPD - R = np.linalg.cholesky(M) - - # we change variable to the canonical frame, and transform back after sampling - # X ~ N(mu, M), then R^-1(X-mu) ~ N(0, I) - init_trans = scipy.linalg.solve(R, initial - mu) # the new initial value - - if f is not None: - if f.shape[0] != len(g) or f.shape[1] != dim: - raise ValueError("Inconsistent linear constraints. f must \ - be an d-by-m matrix and g an d-dimensional vector.") - # g may contains infinity, extract valid constraints - valid = np.logical_and(g < np.inf, g > -np.inf) - g = g[valid] - f = f[valid] - - # verify initial value satisfies linear constraints - if np.any(f @ initial + g < 0): - raise ValueError("Initial point violates linear constraints.") - - # map linear constraints to canonical frame - f_trans = f @ R - g_trans = f @ mu + g - - hmc = HmcSampler(dim, init_trans, f_trans, g_trans, verbose=verbose) - else: - hmc = HmcSampler(dim, init_trans, f, g, verbose=verbose) - - samples = np.zeros((n, dim)) - for i in range(burn_in): - if verbose: - print("=" * 30 + " (burn in) sample {} ".format(i) + "=" * 30) - hmc.sampleNext() - for i in range(n): - if verbose: - print("=" * 30 + " sample {} ".format(i) + "=" * 30) - samples[i] = hmc.sampleNext() - - # transform back - return samples @ R.T + mu + """ + This function generates samples from a Markov chain whose equilibrium distribution is a d-dimensional + multivariate Gaussian truncated by linear inequalities. The log probability density is + log p(X) = -0.5 (X-mu)^T M^-1 (X-mu) + const + in terms of a covariance matrix M and a mean vector mu. The constraints are imposed as explained below. + The Markov chain is built using the Hamiltonian Monte Carlo technique. + + :param n: Number of samples. + :param mu: (m,) vector for the mean of multivariate Gaussian density + :param M: (m,m) covariance matrix of the multivariate Gaussian density + :param initial: (m,) vector with the initial value of the Markov chain. Must satisfy + the truncation inequalities strictly. + :param f: (q,m) matrix, where q is the number of linear constraints. The constraints require each component + of the m-dimensional vector fX+g to be non-negative + :param g: (q,) vector with the constant terms in the above linear constraints. + :param burn_in: The number of burn-in iterations. The Markov chain is sampled n + burn_in + times, and the last n samples are returned. + :param verbose: + :return: (n, m) + """ + + dim = len(mu) + if M.shape[1] != dim: + raise ValueError("The covariance matrix must be square.") + + if len(initial) != dim: + raise ValueError("Wrong length for initial value vector.") + + # verify that M is positive definite, it will raise an error if M is not SPD + R = np.linalg.cholesky(M) + + # we change variable to the canonical frame, and transform back after sampling + # X ~ N(mu, M), then R^-1(X-mu) ~ N(0, I) + init_trans = scipy.linalg.solve(R, initial - mu) # the new initial value + + if f is not None: + if f.shape[0] != len(g) or f.shape[1] != dim: + raise ValueError( + "Inconsistent linear constraints. f must \ + be an d-by-m matrix and g an d-dimensional vector." + ) + # g may contains infinity, extract valid constraints + valid = np.logical_and(g < np.inf, g > -np.inf) + g = g[valid] + f = f[valid] + + # verify initial value satisfies linear constraints + if np.any(f @ initial + g < 0): + raise ValueError("Initial point violates linear constraints.") + + # map linear constraints to canonical frame + f_trans = f @ R + g_trans = f @ mu + g + + hmc = HmcSampler(dim, init_trans, f_trans, g_trans, verbose=verbose) + else: + hmc = HmcSampler(dim, init_trans, f, g, verbose=verbose) + + samples = np.zeros((n, dim)) + for i in range(burn_in): + if verbose: + print("=" * 30 + " (burn in) sample {} ".format(i) + "=" * 30) + hmc.sampleNext() + for i in range(n): + if verbose: + print("=" * 30 + " sample {} ".format(i) + "=" * 30) + samples[i] = hmc.sampleNext() + + # transform back + return samples @ R.T + mu diff --git a/stpy/helpers/quadrature_helper.py b/stpy/helpers/quadrature_helper.py index b03d28e..997cd27 100644 --- a/stpy/helpers/quadrature_helper.py +++ b/stpy/helpers/quadrature_helper.py @@ -6,253 +6,320 @@ def integrate_sin_sin(a, b, omega1, omega2): - """ - - :param a: - :param b: - :param omega1: - :param omega2: - :return: - >>> np.round(integrate_sin_sin(0.2,0.5,2,3),6) - 0.164678 - """ - eps = 10e-5 - if np.abs(omega1 - omega2) < eps: - F = lambda x: x / 2 - np.sin(2 * omega1 * x) / (4 * omega1) - else: - F = lambda x: (omega2 * np.sin(omega1 * x) * np.cos(x * omega2) - - omega1 * np.cos(omega1 * x) * np.sin(omega2 * x)) / (omega1 ** 2 - omega2 ** 2) - return F(b) - F(a) + """ + + :param a: + :param b: + :param omega1: + :param omega2: + :return: + >>> np.round(integrate_sin_sin(0.2,0.5,2,3),6) + 0.164678 + """ + eps = 10e-5 + if np.abs(omega1 - omega2) < eps: + F = lambda x: x / 2 - np.sin(2 * omega1 * x) / (4 * omega1) + else: + F = lambda x: ( + omega2 * np.sin(omega1 * x) * np.cos(x * omega2) + - omega1 * np.cos(omega1 * x) * np.sin(omega2 * x) + ) / (omega1**2 - omega2**2) + return F(b) - F(a) def integrate_sin_cos(a, b, omega1, omega2): - """ - - :param a: - :param b: - :param omega1: - :param omega2: - :return: - >>> np.round(integrate_sin_cos(0.2,0.5,2,3),6) - 0.082903 - """ - eps = 10e-5 - if np.abs(omega1 - omega2) < eps: - F = lambda x: -np.cos(omega1 * x) ** 2 / (2 * omega1) - else: - F = lambda x: -(omega2 * np.sin(omega1 * x) * np.sin(x * omega2) + - omega1 * np.cos(omega1 * x) * np.cos(omega2 * x)) / (omega1 ** 2 - omega2 ** 2) - return F(b) - F(a) + """ + + :param a: + :param b: + :param omega1: + :param omega2: + :return: + >>> np.round(integrate_sin_cos(0.2,0.5,2,3),6) + 0.082903 + """ + eps = 10e-5 + if np.abs(omega1 - omega2) < eps: + F = lambda x: -np.cos(omega1 * x) ** 2 / (2 * omega1) + else: + F = lambda x: -( + omega2 * np.sin(omega1 * x) * np.sin(x * omega2) + + omega1 * np.cos(omega1 * x) * np.cos(omega2 * x) + ) / (omega1**2 - omega2**2) + return F(b) - F(a) def integrate_cos_cos(a, b, omega1, omega2): - """ - - :param a: - :param b: - :param omega1: - :param omega2: - :return: - >>> np.round(integrate_cos_cos(0.2,0.5,2,3),6) - 0.116078 - """ - eps = 10e-5 - if np.abs(omega1 - omega2) < eps: - F = lambda x: x / 2 + np.sin(2 * omega1 * x) / (4 * omega1) - else: - F = lambda x: (omega1 * np.sin(omega1 * x) * np.cos(x * omega2) - - omega2 * np.cos(omega1 * x) * np.sin(omega2 * x)) / (omega1 ** 2 - omega2 ** 2) - return F(b) - F(a) + """ + + :param a: + :param b: + :param omega1: + :param omega2: + :return: + >>> np.round(integrate_cos_cos(0.2,0.5,2,3),6) + 0.116078 + """ + eps = 10e-5 + if np.abs(omega1 - omega2) < eps: + F = lambda x: x / 2 + np.sin(2 * omega1 * x) / (4 * omega1) + else: + F = lambda x: ( + omega1 * np.sin(omega1 * x) * np.cos(x * omega2) + - omega2 * np.cos(omega1 * x) * np.sin(omega2 * x) + ) / (omega1**2 - omega2**2) + return F(b) - F(a) def integrate2d_sin_sin(A, B, C, D, a, b, c, d): - Cos = lambda x: np.cos(x) - val = (1 / (2 * (b - d) * (b + d))) * (-(((b + d) * (Cos(a * A - A * c + b * C - C * d) - - Cos(a * B - B * c + b * C - C * d))) / (a - c)) + ( - (b + d) * (Cos(a * A - A * c + b * D - d * D) - - Cos(a * B - B * c + b * D - d * D))) / (a - c) + ( - 1 / ( - a + c)) * (b - d) * (Cos(A * (a + c) + C * (b + d)) - Cos( - B * (a + c) + C * (b + d)) - Cos(A * (a + c) + (b + d) * - D) + Cos(B * (a + c) + (b + d) * D))) - return val + Cos = lambda x: np.cos(x) + val = (1 / (2 * (b - d) * (b + d))) * ( + -( + ( + (b + d) + * ( + Cos(a * A - A * c + b * C - C * d) + - Cos(a * B - B * c + b * C - C * d) + ) + ) + / (a - c) + ) + + ( + (b + d) + * (Cos(a * A - A * c + b * D - d * D) - Cos(a * B - B * c + b * D - d * D)) + ) + / (a - c) + + (1 / (a + c)) + * (b - d) + * ( + Cos(A * (a + c) + C * (b + d)) + - Cos(B * (a + c) + C * (b + d)) + - Cos(A * (a + c) + (b + d) * D) + + Cos(B * (a + c) + (b + d) * D) + ) + ) + return val def integrate2d_sin_cos(A, B, C, D, a, b, c, d): - Sin = lambda x: np.sin(x) - val = (1 / (2 * (b - d) * (b + d))) * (((b + d) * (-Sin(a * A - A * c + b * C - C * d) + - Sin(a * B - B * c + b * C - C * d))) / (a - c) + ( - (b + d) * (Sin(a * A - A * c + b * D - d * D) - - Sin(a * B - B * c + b * D - d * D))) / (a - c) - ( - 1 / (a + c)) * (b - d) * (Sin(A * (a + c) + C * (b + d)) - - Sin(B * (a + c) + C * (b + d)) - Sin( - A * (a + c) + (b + d) * D) + - Sin(B * (a + c) + (b + d) * D))) - return val + Sin = lambda x: np.sin(x) + val = (1 / (2 * (b - d) * (b + d))) * ( + ( + (b + d) + * (-Sin(a * A - A * c + b * C - C * d) + Sin(a * B - B * c + b * C - C * d)) + ) + / (a - c) + + ( + (b + d) + * (Sin(a * A - A * c + b * D - d * D) - Sin(a * B - B * c + b * D - d * D)) + ) + / (a - c) + - (1 / (a + c)) + * (b - d) + * ( + Sin(A * (a + c) + C * (b + d)) + - Sin(B * (a + c) + C * (b + d)) + - Sin(A * (a + c) + (b + d) * D) + + Sin(B * (a + c) + (b + d) * D) + ) + ) + return val def integrate2d_cos_cos(A, B, C, D, a, b, c, d): - Cos = lambda x: np.cos(x) - val = -(1 / (2 * (b - d) * (b + d))) * (((b + d)(Cos(a * A - A * c + b * C - C * d) - - Cos(a * B - B * c + b * C - C * d))) / ( - a - c) - ((b + d) * (Cos(a * A - A * c + b * D - d * D) - - Cos(a * B - B * c + b * D - d * D))) / ( - a - c) + (1 / ( - a + c)) * (b - d) * (Cos(A * (a + c) + C * (b + d)) - - Cos(B * (a + c) + C * (b + d)) - Cos(A * (a + c) + (b + d) * D) + Cos( - B * (a + c) + (b + d) * D))) - return val + Cos = lambda x: np.cos(x) + val = -(1 / (2 * (b - d) * (b + d))) * ( + ( + (b + d)( + Cos(a * A - A * c + b * C - C * d) - Cos(a * B - B * c + b * C - C * d) + ) + ) + / (a - c) + - ( + (b + d) + * (Cos(a * A - A * c + b * D - d * D) - Cos(a * B - B * c + b * D - d * D)) + ) + / (a - c) + + (1 / (a + c)) + * (b - d) + * ( + Cos(A * (a + c) + C * (b + d)) + - Cos(B * (a + c) + C * (b + d)) + - Cos(A * (a + c) + (b + d) * D) + + Cos(B * (a + c) + (b + d) * D) + ) + ) + return val def integrate_sin_multidimensional(a, b, omegas): - """ - - :param a: bounds start - :param b: bounds end - :param omegas: frequencies - :return: - >>> np.round(integrate_sin_multidimensional(np.array([0.5]),np.array([1.]),np.array([2.])),5) - 0.47822 - >>> np.round(integrate_sin_multidimensional(np.array([0.5,0.3]),np.array([1.,4.]),np.array([2.,5.])),5) - -0.01037 - >>> np.round(integrate_sin_multidimensional(np.array([0.5,0.3,0.8]),np.array([1.,4.,3.1]),np.array([2.,5.,1.5])),5) - 0.02232 - """ - d = omegas.shape[0] - - z = np.array([omegas * b, omegas * a]) - sign = np.array([omegas * 0, omegas * 0 + 1]) - ar = cartesian([z[:, i] for i in range(z.shape[1])]) - signs = cartesian([sign[:, i] for i in range(sign.shape[1])]) - signs = np.sum(signs, axis=1) - ar = np.sum(ar, axis=1) - k = 1. / np.prod(omegas) - # print (ar) - - if d % 2 == 1: - r = np.cos(ar) - if d % 4 == 1: - r = -r - for i in range(r.shape[0]): - if signs[i] % 2 == 1: - r[i] = -r[i] - else: - r = np.sin(ar) - if d % 4 == 3: - r = -r - for i in range(r.shape[0]): - if signs[i] % 2 == 0: - r[i] = -r[i] - return k * np.sum(r) + """ + + :param a: bounds start + :param b: bounds end + :param omegas: frequencies + :return: + >>> np.round(integrate_sin_multidimensional(np.array([0.5]),np.array([1.]),np.array([2.])),5) + 0.47822 + >>> np.round(integrate_sin_multidimensional(np.array([0.5,0.3]),np.array([1.,4.]),np.array([2.,5.])),5) + -0.01037 + >>> np.round(integrate_sin_multidimensional(np.array([0.5,0.3,0.8]),np.array([1.,4.,3.1]),np.array([2.,5.,1.5])),5) + 0.02232 + """ + d = omegas.shape[0] + + z = np.array([omegas * b, omegas * a]) + sign = np.array([omegas * 0, omegas * 0 + 1]) + ar = cartesian([z[:, i] for i in range(z.shape[1])]) + signs = cartesian([sign[:, i] for i in range(sign.shape[1])]) + signs = np.sum(signs, axis=1) + ar = np.sum(ar, axis=1) + k = 1.0 / np.prod(omegas) + # print (ar) + + if d % 2 == 1: + r = np.cos(ar) + if d % 4 == 1: + r = -r + for i in range(r.shape[0]): + if signs[i] % 2 == 1: + r[i] = -r[i] + else: + r = np.sin(ar) + if d % 4 == 3: + r = -r + for i in range(r.shape[0]): + if signs[i] % 2 == 0: + r[i] = -r[i] + return k * np.sum(r) def integrate_cos_multidimensional(a, b, omegas): - """ - - :param a: bounds start - :param b: bounds end - :param omegas: frequencies - :return: - >>> np.round(integrate_cos_multidimensional(np.array([0.5]),np.array([1.]),np.array([2.])),5) - 0.03391 - >>> np.round(integrate_cos_multidimensional(np.array([0.5,0.3]),np.array([1.,4.]),np.array([2.,5.])),5) - 0.03169 - >>> np.round(integrate_cos_multidimensional(np.array([0.5,0.3,0.8]),np.array([1.,4.,3.1]),np.array([2.,5.,1.5])),5) - -0.03784 - """ - d = omegas.shape[0] - - z = np.array([omegas * b, omegas * a]) - sign = np.array([omegas * 0, omegas * 0 + 1]) - # print(z) - ar = cartesian([z[:, i] for i in range(z.shape[1])]) - signs = cartesian([sign[:, i] for i in range(sign.shape[1])]) - signs = np.sum(signs, axis=1) - ar = np.sum(ar, axis=1) - k = 1. / np.prod(omegas) - # print (ar) - - if d % 2 == 1: - r = np.sin(ar) - if d % 4 == 3: - r = -r - for i in range(r.shape[0]): - if signs[i] % 2 == 1: - r[i] = -r[i] - else: - r = np.cos(ar) - if d % 4 == 1: - r = -r - for i in range(r.shape[0]): - if signs[i] % 2 == 0: - r[i] = -r[i] - - return k * np.sum(r) + """ + + :param a: bounds start + :param b: bounds end + :param omegas: frequencies + :return: + >>> np.round(integrate_cos_multidimensional(np.array([0.5]),np.array([1.]),np.array([2.])),5) + 0.03391 + >>> np.round(integrate_cos_multidimensional(np.array([0.5,0.3]),np.array([1.,4.]),np.array([2.,5.])),5) + 0.03169 + >>> np.round(integrate_cos_multidimensional(np.array([0.5,0.3,0.8]),np.array([1.,4.,3.1]),np.array([2.,5.,1.5])),5) + -0.03784 + """ + d = omegas.shape[0] + + z = np.array([omegas * b, omegas * a]) + sign = np.array([omegas * 0, omegas * 0 + 1]) + # print(z) + ar = cartesian([z[:, i] for i in range(z.shape[1])]) + signs = cartesian([sign[:, i] for i in range(sign.shape[1])]) + signs = np.sum(signs, axis=1) + ar = np.sum(ar, axis=1) + k = 1.0 / np.prod(omegas) + # print (ar) + + if d % 2 == 1: + r = np.sin(ar) + if d % 4 == 3: + r = -r + for i in range(r.shape[0]): + if signs[i] % 2 == 1: + r[i] = -r[i] + else: + r = np.cos(ar) + if d % 4 == 1: + r = -r + for i in range(r.shape[0]): + if signs[i] % 2 == 0: + r[i] = -r[i] + + return k * np.sum(r) def romberg2d(func, x1, x2, y1, y2): - """ - - :param func: - :param x1: - :param x2: - :param y1: - :param y2: - :return: - >>> np.round(romberg2d(lambda x,y:2*x**2+y**2,0,1,1,2),5) - 3.0 - """ - func2 = lambda y, a, b: integrate.romberg(func, a, b, args=(y,)) - return integrate.romberg(func2, y1, y2, args=(x1, x2)) - - -def quadvec2(func, x1, x2, y1, y2, epsabs=1e-200, epsrel=1e-08, limit=1000, workers=1, quadrature='gk21'): - """ - >>> alpha = np.linspace(0.0, 2.0, num=30) - >>> np.round(quadvec2(lambda x,y: x**alpha + y**alpha,0,1,1,2)[0],5) - 2.0 - >>> np.round(quadvec2(lambda x,y: 2*x**alpha + y**alpha,0,1,1,2)[-1],5) - 3.0 - """ - func2 = lambda y: \ - integrate.quad_vec(lambda x: func(x, y), x1, x2, epsabs=epsabs, epsrel=epsrel, limit=limit, quadrature=quadrature)[ - 0] - res = integrate.quad_vec(func2, y1, y2, epsabs=epsabs, epsrel=epsrel, limit=limit, quadrature=quadrature) - return res[0] + """ + + :param func: + :param x1: + :param x2: + :param y1: + :param y2: + :return: + >>> np.round(romberg2d(lambda x,y:2*x**2+y**2,0,1,1,2),5) + 3.0 + """ + func2 = lambda y, a, b: integrate.romberg(func, a, b, args=(y,)) + return integrate.romberg(func2, y1, y2, args=(x1, x2)) + + +def quadvec2( + func, + x1, + x2, + y1, + y2, + epsabs=1e-200, + epsrel=1e-08, + limit=1000, + workers=1, + quadrature="gk21", +): + """ + >>> alpha = np.linspace(0.0, 2.0, num=30) + >>> np.round(quadvec2(lambda x,y: x**alpha + y**alpha,0,1,1,2)[0],5) + 2.0 + >>> np.round(quadvec2(lambda x,y: 2*x**alpha + y**alpha,0,1,1,2)[-1],5) + 3.0 + """ + func2 = lambda y: integrate.quad_vec( + lambda x: func(x, y), + x1, + x2, + epsabs=epsabs, + epsrel=epsrel, + limit=limit, + quadrature=quadrature, + )[0] + res = integrate.quad_vec( + func2, y1, y2, epsabs=epsabs, epsrel=epsrel, limit=limit, quadrature=quadrature + ) + return res[0] def AvgEig(Phi, xtest): - n = Phi(xtest[0].view(1, -1)).size()[0] - A = torch.zeros(size=(n, n), dtype=torch.float64) - for x in xtest: - v = Phi(x.view(1, -1)).view(-1, 1) - A = A + v @ v.T - A = A / xtest.size()[0] - # import matplotlib.pyplot as plt - # plt.imshow(A) - # plt.colorbar() - # plt.show() - maxeig = torch.min(torch.symeig(A)[0]) - return maxeig + n = Phi(xtest[0].view(1, -1)).size()[0] + A = torch.zeros(size=(n, n), dtype=torch.float64) + for x in xtest: + v = Phi(x.view(1, -1)).view(-1, 1) + A = A + v @ v.T + A = A / xtest.size()[0] + # import matplotlib.pyplot as plt + # plt.imshow(A) + # plt.colorbar() + # plt.show() + maxeig = torch.min(torch.symeig(A)[0]) + return maxeig def volume_eig(Phi, xtest, alpha=0.5): - n = Phi(xtest[0].view(1, -1)).size()[0] - A = torch.zeros(size=(n, n), dtype=torch.float64) - for x in xtest: - v = Phi(x.view(1, -1)).view(-1, 1) - mineig = torch.min(torch.symeig(v @ v.T)[0]) - print(mineig) - vol = 0 - return vol + n = Phi(xtest[0].view(1, -1)).size()[0] + A = torch.zeros(size=(n, n), dtype=torch.float64) + for x in xtest: + v = Phi(x.view(1, -1)).view(-1, 1) + mineig = torch.min(torch.symeig(v @ v.T)[0]) + print(mineig) + vol = 0 + return vol def chebyschev_nodes(n, d=1, L_infinity_ball=1): - nodes, w = np.polynomial.chebyshev.chebgauss(n) - arrays = [nodes.reshape(n, 1) for i in range(d)] - xtest = cartesian(arrays) - return xtest + nodes, w = np.polynomial.chebyshev.chebgauss(n) + arrays = [nodes.reshape(n, 1) for i in range(d)] + xtest = cartesian(arrays) + return xtest if __name__ == "__main__": - pass + pass diff --git a/stpy/helpers/scores.py b/stpy/helpers/scores.py index 133db9c..af1cf97 100644 --- a/stpy/helpers/scores.py +++ b/stpy/helpers/scores.py @@ -1,4 +1,5 @@ import torch -def r_score_std(y_true, y_pred, std, alpha = 1.): - return 1 - torch.mean((y_true - y_pred)**2)/(alpha*std**2) \ No newline at end of file + +def r_score_std(y_true, y_pred, std, alpha=1.0): + return 1 - torch.mean((y_true - y_pred) ** 2) / (alpha * std**2) diff --git a/stpy/helpers/transformations.py b/stpy/helpers/transformations.py index 6a77ee1..2ea4eb1 100644 --- a/stpy/helpers/transformations.py +++ b/stpy/helpers/transformations.py @@ -5,40 +5,48 @@ def transform(X, low=-1, high=1, functions=True, offsets=None): - n, d = X.size() - Y = X.clone() - transforms = [] - inv_transforms = [] - - for i in range(d): - - if offsets is None: - xmin = torch.min(X[:, i]).clone().numpy() - xmax = torch.max(X[:, i]).clone().numpy() - else: - xmin = offsets[i][0] - xmax = offsets[i][1] - - k = copy.copy(float((xmin - xmax) / ((low - high)))) - q = copy.copy(float(xmin - low * k)) - - k2 = copy.copy(float((low - high) / (xmin - xmax))) - q2 = copy.copy(float(high - xmax * k2)) - - inv_transform = lambda a, k=k, q=q: k * a + q - transform = lambda a, k2=k2, q2=q2: k2 * a + q2 - - transforms.append(copy.copy(transform)) - inv_transforms.append(copy.copy(inv_transform)) - - Y[:, i] = torch.from_numpy(np.apply_along_axis(transform, 0, X[:, i].numpy())) - - trans = lambda Z: torch.stack( - [torch.from_numpy(np.apply_along_axis(transforms[i], 0, Z[:, i].numpy())) for i in range(d)]).T - inv_trans = lambda Y: torch.stack( - [torch.from_numpy(np.apply_along_axis(inv_transforms[i], 0, Y[:, i].numpy())) for i in range(d)]).T - - if functions == True: - return Y, trans, inv_trans, transforms, inv_transforms - else: - return Y + n, d = X.size() + Y = X.clone() + transforms = [] + inv_transforms = [] + + for i in range(d): + + if offsets is None: + xmin = torch.min(X[:, i]).clone().numpy() + xmax = torch.max(X[:, i]).clone().numpy() + else: + xmin = offsets[i][0] + xmax = offsets[i][1] + + k = copy.copy(float((xmin - xmax) / ((low - high)))) + q = copy.copy(float(xmin - low * k)) + + k2 = copy.copy(float((low - high) / (xmin - xmax))) + q2 = copy.copy(float(high - xmax * k2)) + + inv_transform = lambda a, k=k, q=q: k * a + q + transform = lambda a, k2=k2, q2=q2: k2 * a + q2 + + transforms.append(copy.copy(transform)) + inv_transforms.append(copy.copy(inv_transform)) + + Y[:, i] = torch.from_numpy(np.apply_along_axis(transform, 0, X[:, i].numpy())) + + trans = lambda Z: torch.stack( + [ + torch.from_numpy(np.apply_along_axis(transforms[i], 0, Z[:, i].numpy())) + for i in range(d) + ] + ).T + inv_trans = lambda Y: torch.stack( + [ + torch.from_numpy(np.apply_along_axis(inv_transforms[i], 0, Y[:, i].numpy())) + for i in range(d) + ] + ).T + + if functions == True: + return Y, trans, inv_trans, transforms, inv_transforms + else: + return Y diff --git a/stpy/helpers/wavelets.py b/stpy/helpers/wavelets.py index 9a378af..5872fde 100644 --- a/stpy/helpers/wavelets.py +++ b/stpy/helpers/wavelets.py @@ -1,26 +1,26 @@ from mpmath import * phi = lambda x: (0 <= x < 1) # scaling fct -psi = lambda x: (0 <= x < .5) - (.5 <= x < 1) # wavelet fct -phi_j_k = lambda x, j, k: 2 ** (j / 2) * phi(2 ** j * x - k) -psi_j_k = lambda x, j, k: 2 ** (j / 2) * psi(2 ** j * x - k) +psi = lambda x: (0 <= x < 0.5) - (0.5 <= x < 1) # wavelet fct +phi_j_k = lambda x, j, k: 2 ** (j / 2) * phi(2**j * x - k) +psi_j_k = lambda x, j, k: 2 ** (j / 2) * psi(2**j * x - k) def haar(f, interval, level): - c0 = quadgl(lambda t: f(t) * phi_j_k(t, 0, 0), interval) + c0 = quadgl(lambda t: f(t) * phi_j_k(t, 0, 0), interval) - coef = [] - for j in xrange(0, level): - for k in xrange(0, 2 ** j): - djk = quadgl(lambda t: f(t) * psi_j_k(t, j, k), interval) - coef.append((j, k, djk)) + coef = [] + for j in xrange(0, level): + for k in xrange(0, 2**j): + djk = quadgl(lambda t: f(t) * psi_j_k(t, j, k), interval) + coef.append((j, k, djk)) - return c0, coef + return c0, coef def haarval(haar_coef, x): - c0, coef = haar_coef - s = c0 * phi_j_k(x, 0, 0) - for j, k, djk in coef: - s += djk * psi_j_k(x, j, k) - return s + c0, coef = haar_coef + s = c0 * phi_j_k(x, 0, 0) + for j, k, djk in coef: + s += djk * psi_j_k(x, j, k) + return s diff --git a/stpy/kernel_functions/additive_decorator.py b/stpy/kernel_functions/additive_decorator.py index 718ef62..a477f67 100644 --- a/stpy/kernel_functions/additive_decorator.py +++ b/stpy/kernel_functions/additive_decorator.py @@ -1,5 +1,6 @@ def additive(func): def wrapper(): - func() - return wrapper \ No newline at end of file + func() + + return wrapper diff --git a/stpy/kernel_functions/ard_kernel.py b/stpy/kernel_functions/ard_kernel.py index 353cdc7..21a1ba7 100644 --- a/stpy/kernel_functions/ard_kernel.py +++ b/stpy/kernel_functions/ard_kernel.py @@ -3,91 +3,91 @@ def ard_kernel(a, b, **kwargs): - p = KernelParams(kwargs) - p.assert_existence(["ard_gamma", "kappa", "group"]) + p = KernelParams(kwargs) + p.assert_existence(["ard_gamma", "kappa", "group"]) - a = a[:, p.group] - b = b[:, p.group] + a = a[:, p.group] + b = b[:, p.group] - D = torch.diag(1. / (p.ard_gamma[p.group])) + D = torch.diag(1.0 / (p.ard_gamma[p.group])) - a = torch.mm(a, D) - b = torch.mm(b, D) + a = torch.mm(a, D) + b = torch.mm(b, D) - normx = torch.sum(a ** 2, dim=1).reshape(-1, 1) - normy = torch.sum(b ** 2, dim=1).reshape(-1, 1) + normx = torch.sum(a**2, dim=1).reshape(-1, 1) + normy = torch.sum(b**2, dim=1).reshape(-1, 1) - product = torch.mm(b, torch.t(a)) - sqdist = -2 * product + torch.t(normx) + normy - arg = - 0.5 * sqdist - res = torch.exp(arg) - return p.kappa * res + product = torch.mm(b, torch.t(a)) + sqdist = -2 * product + torch.t(normx) + normy + arg = -0.5 * sqdist + res = torch.exp(arg) + return p.kappa * res def ard_kernel_diag(a, b, **kwargs): - p = KernelParams(kwargs) - p.assert_existence(["ard_gamma", "kappa", "group"]) + p = KernelParams(kwargs) + p.assert_existence(["ard_gamma", "kappa", "group"]) - a = a[:, p.group] - b = b[:, p.group] + a = a[:, p.group] + b = b[:, p.group] - D = torch.diag(1. / (p.ard_gamma[p.group])) - a = torch.mm(a, D) - b = torch.mm(b, D) - normx = torch.sum(a ** 2, dim=1).reshape(-1, 1) - normy = torch.sum(b ** 2, dim=1).reshape(-1, 1) + D = torch.diag(1.0 / (p.ard_gamma[p.group])) + a = torch.mm(a, D) + b = torch.mm(b, D) + normx = torch.sum(a**2, dim=1).reshape(-1, 1) + normy = torch.sum(b**2, dim=1).reshape(-1, 1) - product = torch.mm(b, torch.t(a)) - sqdist = -2 * product + torch.t(normx) + normy - arg = - 0.5 * sqdist - res = torch.exp(arg) - return p.kappa * res + product = torch.mm(b, torch.t(a)) + sqdist = -2 * product + torch.t(normx) + normy + arg = -0.5 * sqdist + res = torch.exp(arg) + return p.kappa * res def ard_per_group_kernel_additive(self, a, b, **kwargs): - if 'kappa' in kwargs.keys(): - kappa = kwargs['kappa'] - else: - kappa = self.kappa - - if 'groups' in kwargs.keys(): - groups = kwargs['groups'] - else: - groups = self.groups - - if 'ard_per_group' in kwargs.keys(): - ard_per_group = kwargs['ard_per_group'] - else: - raise AssertionError("This kernel requires 'ard_per_group' initial parameters") - - (n, z) = tuple(a.size()) - (q, m) = tuple(b.size()) - - r = torch.zeros(size=(q, n), dtype=torch.float64) - groups_index = 0 - - for group_add in groups: - kwargs['group'] = group_add - - size_group = len(group_add) - # use per group lenghtscale - # kwargs['ard_gamma'] = ard_per_group[groups_index:groups_index+size_group] - gamma = ard_per_group[groups_index:groups_index + size_group] - groups_index += size_group - - ax = a[:, group_add] - bx = b[:, group_add] - D = torch.diag(1. / (gamma)) - ax = torch.mm(ax, D) - bx = torch.mm(bx, D) - normx = torch.sum(ax ** 2, dim=1).reshape(-1, 1) - normy = torch.sum(bx ** 2, dim=1).reshape(-1, 1) - product = torch.mm(bx, torch.t(ax)) - # sqdist = torch.tile(normx, b.shape[0]).T + torch.tile(normy, a.shape[0]) - 2 * product - sqdist = -2 * product + torch.t(normx) + normy - arg = - 0.5 * sqdist - res = torch.exp(arg) - r = r + res - - r = r / float(len(groups)) - return kappa * r \ No newline at end of file + if "kappa" in kwargs.keys(): + kappa = kwargs["kappa"] + else: + kappa = self.kappa + + if "groups" in kwargs.keys(): + groups = kwargs["groups"] + else: + groups = self.groups + + if "ard_per_group" in kwargs.keys(): + ard_per_group = kwargs["ard_per_group"] + else: + raise AssertionError("This kernel requires 'ard_per_group' initial parameters") + + (n, z) = tuple(a.size()) + (q, m) = tuple(b.size()) + + r = torch.zeros(size=(q, n), dtype=torch.float64) + groups_index = 0 + + for group_add in groups: + kwargs["group"] = group_add + + size_group = len(group_add) + # use per group lenghtscale + # kwargs['ard_gamma'] = ard_per_group[groups_index:groups_index+size_group] + gamma = ard_per_group[groups_index : groups_index + size_group] + groups_index += size_group + + ax = a[:, group_add] + bx = b[:, group_add] + D = torch.diag(1.0 / (gamma)) + ax = torch.mm(ax, D) + bx = torch.mm(bx, D) + normx = torch.sum(ax**2, dim=1).reshape(-1, 1) + normy = torch.sum(bx**2, dim=1).reshape(-1, 1) + product = torch.mm(bx, torch.t(ax)) + # sqdist = torch.tile(normx, b.shape[0]).T + torch.tile(normy, a.shape[0]) - 2 * product + sqdist = -2 * product + torch.t(normx) + normy + arg = -0.5 * sqdist + res = torch.exp(arg) + r = r + res + + r = r / float(len(groups)) + return kappa * r diff --git a/stpy/kernel_functions/covar_kernel.py b/stpy/kernel_functions/covar_kernel.py index 070cdc3..2ebdecc 100644 --- a/stpy/kernel_functions/covar_kernel.py +++ b/stpy/kernel_functions/covar_kernel.py @@ -1,20 +1,21 @@ import torch from stpy.kernel_functions.kernel_params import KernelParams + def covar_kernel(a, b, **kwargs): - p = KernelParams(kwargs) - p.assert_existence(["cov", "kappa", "group"]) + p = KernelParams(kwargs) + p.assert_existence(["cov", "kappa", "group"]) - a = a[:, p.group] - b = b[:, p.group] - a = torch.mm(a, p.cov) - b = torch.mm(b, p.cov) + a = a[:, p.group] + b = b[:, p.group] + a = torch.mm(a, p.cov) + b = torch.mm(b, p.cov) - normx = torch.sum(a ** 2, dim=1).reshape(-1, 1) - normy = torch.sum(b ** 2, dim=1).reshape(-1, 1) - product = torch.mm(b, torch.t(a)) + normx = torch.sum(a**2, dim=1).reshape(-1, 1) + normy = torch.sum(b**2, dim=1).reshape(-1, 1) + product = torch.mm(b, torch.t(a)) - sqdist = -2 * product + torch.t(normx) + normy - arg = - 0.5 * sqdist - res = torch.exp(arg) - return p.kappa * res \ No newline at end of file + sqdist = -2 * product + torch.t(normx) + normy + arg = -0.5 * sqdist + res = torch.exp(arg) + return p.kappa * res diff --git a/stpy/kernel_functions/custom_map_kernel.py b/stpy/kernel_functions/custom_map_kernel.py index 62cc068..0e2ddea 100644 --- a/stpy/kernel_functions/custom_map_kernel.py +++ b/stpy/kernel_functions/custom_map_kernel.py @@ -1,14 +1,15 @@ from stpy.kernel_functions.kernel_params import KernelParams from stpy.kernel_functions.linear_kernel import linear_kernel + def custom_map_kernel(a, b, **kwargs): - p = KernelParams(kwargs) - p.assert_existence(["map", "kappa", "group"]) + p = KernelParams(kwargs) + p.assert_existence(["map", "kappa", "group"]) - a = a[:, p.group] - b = b[:, p.group] + a = a[:, p.group] + b = b[:, p.group] - if map is not None: - return p.kappa * linear_kernel(torch.t(p.map(a)), torch.t(p.map(b))).detach() - else: - return p.kappa * linear_kernel(a, b) \ No newline at end of file + if map is not None: + return p.kappa * linear_kernel(torch.t(p.map(a)), torch.t(p.map(b))).detach() + else: + return p.kappa * linear_kernel(a, b) diff --git a/stpy/kernel_functions/gibbs_custom_kernel.py b/stpy/kernel_functions/gibbs_custom_kernel.py index d3f3a37..dd6b153 100644 --- a/stpy/kernel_functions/gibbs_custom_kernel.py +++ b/stpy/kernel_functions/gibbs_custom_kernel.py @@ -1,23 +1,24 @@ from stpy.kernel_functions.kernel_params import KernelParams import torch + def gibbs_custom_kernel(a, b, **kwargs): - p = KernelParams(kwargs) - p.assert_existence(["gamma_fun", "kappa", "group"]) + p = KernelParams(kwargs) + p.assert_existence(["gamma_fun", "kappa", "group"]) - a = a[:, p.group] - b = b[:, p.group] - # print (a.shape, b.shape) - normx = torch.sum(a ** 2, dim=1).view(-1, 1) - normy = torch.sum(b ** 2, dim=1).view(-1, 1) + a = a[:, p.group] + b = b[:, p.group] + # print (a.shape, b.shape) + normx = torch.sum(a**2, dim=1).view(-1, 1) + normy = torch.sum(b**2, dim=1).view(-1, 1) - product = torch.mm(b, torch.t(a)) - # sqdist = torch.tile(normx, b.shape[0]).T + torch.tile(normy, a.shape[0]) - 2 * product - sqdist = -2 * product + torch.t(normx) + normy + product = torch.mm(b, torch.t(a)) + # sqdist = torch.tile(normx, b.shape[0]).T + torch.tile(normy, a.shape[0]) - 2 * product + sqdist = -2 * product + torch.t(normx) + normy - lengthscales = p.gamma_fun(a, b) + lengthscales = p.gamma_fun(a, b) - arg = (-0.5 / lengthscales) * sqdist - res = torch.exp(arg) - return p.kappa * res \ No newline at end of file + arg = (-0.5 / lengthscales) * sqdist + res = torch.exp(arg) + return p.kappa * res diff --git a/stpy/kernel_functions/gibbs_kernel.py b/stpy/kernel_functions/gibbs_kernel.py index dbb4dc6..c9d9eca 100644 --- a/stpy/kernel_functions/gibbs_kernel.py +++ b/stpy/kernel_functions/gibbs_kernel.py @@ -1,24 +1,25 @@ import torch from stpy.kernel_functions.kernel_params import KernelParams + def gibbs_kernel(a, b, **kwargs): - p = KernelParams(kwargs) - p.assert_existence(["gamma_fun", "kappa", "group"]) + p = KernelParams(kwargs) + p.assert_existence(["gamma_fun", "kappa", "group"]) - a = a[:, p.group] - b = b[:, p.group] - # print (a.shape, b.shape) - normx = torch.sum(a ** 2, dim=1).view(-1, 1) - normy = torch.sum(b ** 2, dim=1).view(-1, 1) + a = a[:, p.group] + b = b[:, p.group] + # print (a.shape, b.shape) + normx = torch.sum(a**2, dim=1).view(-1, 1) + normy = torch.sum(b**2, dim=1).view(-1, 1) - product = torch.mm(b, torch.t(a)) - # sqdist = torch.tile(normx, b.shape[0]).T + torch.tile(normy, a.shape[0]) - 2 * product - sqdist = -2 * product + torch.t(normx) + normy + product = torch.mm(b, torch.t(a)) + # sqdist = torch.tile(normx, b.shape[0]).T + torch.tile(normy, a.shape[0]) - 2 * product + sqdist = -2 * product + torch.t(normx) + normy - lengthscales = (p.gamma_fun(a) ** 2 + p.gamma_fun(b).T ** 2) + lengthscales = p.gamma_fun(a) ** 2 + p.gamma_fun(b).T ** 2 - print(lengthscales) + print(lengthscales) - arg = (-0.5 / lengthscales) * sqdist - res = torch.exp(arg) - return p.kappa * res \ No newline at end of file + arg = (-0.5 / lengthscales) * sqdist + res = torch.exp(arg) + return p.kappa * res diff --git a/stpy/kernel_functions/kernel_params.py b/stpy/kernel_functions/kernel_params.py index 047091b..9e9f766 100644 --- a/stpy/kernel_functions/kernel_params.py +++ b/stpy/kernel_functions/kernel_params.py @@ -1,11 +1,10 @@ +class KernelParams: -class KernelParams(): + def __init__(self, param_dict): + for key in param_dict: + setattr(self, key, param_dict[key]) - def __init__(self, param_dict): - for key in param_dict: - setattr(self, key, param_dict[key]) - - def assert_existence(self, names): - for name in names: - if not hasattr(self, name): - raise AttributeError("Missing attribute of the kernel %s" % str(name)) + def assert_existence(self, names): + for name in names: + if not hasattr(self, name): + raise AttributeError("Missing attribute of the kernel %s" % str(name)) diff --git a/stpy/kernel_functions/laplace_kernel.py b/stpy/kernel_functions/laplace_kernel.py index a1f1ce0..ffa34ee 100644 --- a/stpy/kernel_functions/laplace_kernel.py +++ b/stpy/kernel_functions/laplace_kernel.py @@ -3,12 +3,13 @@ from sklearn.metrics.pairwise import check_pairwise_arrays, manhattan_distances from stpy.kernel_functions.kernel_params import KernelParams + def laplace_kernel(a, b, **kwargs): - p = KernelParams(kwargs) - p.assert_existence(["gamma", "kappa", "group"]) + p = KernelParams(kwargs) + p.assert_existence(["gamma", "kappa", "group"]) - a = a[:, p.group] - b = b[:, p.group] - K = - manhattan_distances(a, b) / p.gamma ** 2 - K = np.exp(K) # exponentiate K in-place - return p.kappa * torch.from_numpy(K).T \ No newline at end of file + a = a[:, p.group] + b = b[:, p.group] + K = -manhattan_distances(a, b) / p.gamma**2 + K = np.exp(K) # exponentiate K in-place + return p.kappa * torch.from_numpy(K).T diff --git a/stpy/kernel_functions/linear_kernel.py b/stpy/kernel_functions/linear_kernel.py index 93403c4..1c0603b 100644 --- a/stpy/kernel_functions/linear_kernel.py +++ b/stpy/kernel_functions/linear_kernel.py @@ -1,15 +1,16 @@ from stpy.kernel_functions.kernel_params import KernelParams + def linear_kernel(a, b, **kwargs): - """ - linear kernl - :param a: - :param b: - :param kwargs: - :return: - """ - p = KernelParams(kwargs) - p.assert_existence(["kappa", "group"]) - a = a[:, group] - b = b[:, group] - return kappa * (b @ a.T) \ No newline at end of file + """ + linear kernl + :param a: + :param b: + :param kwargs: + :return: + """ + p = KernelParams(kwargs) + p.assert_existence(["kappa", "group"]) + a = a[:, group] + b = b[:, group] + return kappa * (b @ a.T) diff --git a/stpy/kernel_functions/squared_exponential_kernel.py b/stpy/kernel_functions/squared_exponential_kernel.py index 0297a99..cb5ae7e 100644 --- a/stpy/kernel_functions/squared_exponential_kernel.py +++ b/stpy/kernel_functions/squared_exponential_kernel.py @@ -2,37 +2,39 @@ import torch from stpy.kernel_functions.kernel_params import KernelParams + def squared_exponential_kernel(a, b, **kwargs): - """ - - :param a: - :param b: - :param kwargs: must include gamma, kappa, group - :return: - """ - p = KernelParams(kwargs) - p.assert_existence(["gamma", "kappa", "group"]) - - a = a[:, p.group] - b = b[:, p.group] - # print (a.shape, b.shape) - normx = torch.sum(a ** 2, dim=1).view(-1, 1) - normy = torch.sum(b ** 2, dim=1).view(-1, 1) - - product = torch.mm(b, torch.t(a)) - # sqdist = torch.tile(normx, b.shape[0]).T + torch.tile(normy, a.shape[0]) - 2 * product - sqdist = -2 * product + torch.t(normx) + normy - arg = (-0.5 / (p.gamma * p.gamma)) * sqdist - res = torch.exp(arg) - return p.kappa * res - -def squared_exponential_kernel_diag(a,b, **kwargs): - p = KernelParams(kwargs) - p.assert_existence(["gamma", "kappa", "group"]) - - a = a[:, p.group] - b = b[:, p.group] - sqdist = (a-b)**2 - arg = (-0.5 / (p.gamma * p.gamma)) * sqdist - res = torch.exp(arg) - return p.kappa * res \ No newline at end of file + """ + + :param a: + :param b: + :param kwargs: must include gamma, kappa, group + :return: + """ + p = KernelParams(kwargs) + p.assert_existence(["gamma", "kappa", "group"]) + + a = a[:, p.group] + b = b[:, p.group] + # print (a.shape, b.shape) + normx = torch.sum(a**2, dim=1).view(-1, 1) + normy = torch.sum(b**2, dim=1).view(-1, 1) + + product = torch.mm(b, torch.t(a)) + # sqdist = torch.tile(normx, b.shape[0]).T + torch.tile(normy, a.shape[0]) - 2 * product + sqdist = -2 * product + torch.t(normx) + normy + arg = (-0.5 / (p.gamma * p.gamma)) * sqdist + res = torch.exp(arg) + return p.kappa * res + + +def squared_exponential_kernel_diag(a, b, **kwargs): + p = KernelParams(kwargs) + p.assert_existence(["gamma", "kappa", "group"]) + + a = a[:, p.group] + b = b[:, p.group] + sqdist = (a - b) ** 2 + arg = (-0.5 / (p.gamma * p.gamma)) * sqdist + res = torch.exp(arg) + return p.kappa * res diff --git a/stpy/kernel_functions/step_kernel.py b/stpy/kernel_functions/step_kernel.py index 0ddfa78..0a643d4 100644 --- a/stpy/kernel_functions/step_kernel.py +++ b/stpy/kernel_functions/step_kernel.py @@ -1,20 +1,21 @@ from stpy.kernel_functions.kernel_params import KernelParams import torch + def step_kernel(a, b, **kwargs): - p = KernelParams(kwargs) - p.assert_existence(["kappa", "group"]) + p = KernelParams(kwargs) + p.assert_existence(["kappa", "group"]) - a = a[:, p.group] - b = b[:, p.group] + a = a[:, p.group] + b = b[:, p.group] - n, d = a.size() - m, d = b.size() + n, d = a.size() + m, d = b.size() - K = torch.zeros(size=(n, m)).double() + K = torch.zeros(size=(n, m)).double() - for i in range(n): - for j in range(m): - K[i, j] = a[i, :] + b[j, :] - torch.abs(a[i, :] - b[j, :]) + for i in range(n): + for j in range(m): + K[i, j] = a[i, :] + b[j, :] - torch.abs(a[i, :] - b[j, :]) - return p.kappa * K.T \ No newline at end of file + return p.kappa * K.T diff --git a/stpy/kernels.py b/stpy/kernels.py index c05e32b..1cacbb0 100755 --- a/stpy/kernels.py +++ b/stpy/kernels.py @@ -5,1119 +5,1167 @@ from scipy.spatial.distance import cdist from scipy.special import kv from sklearn.metrics.pairwise import check_pairwise_arrays, manhattan_distances -from stpy.kernel_functions.squared_exponential_kernel import squared_exponential_kernel_diag +from stpy.kernel_functions.squared_exponential_kernel import ( + squared_exponential_kernel_diag, +) + class KernelFunction: - def __init__(self, kernel_function=None, kernel_name="squared_exponential", \ - freq=None, groups=None, d=1, gamma=1, ard_gamma=None, nu=1.5, kappa=1, map=None, power=2, - cov=None, params=None, group=None, offset = 0. ): - - if kernel_function is not None: - self.kernel_function = kernel_function - self.optkernel = "custom" - self.kappa = kappa - self.offset = offset - if params is None: - self.params = {'kappa': self.kappa} - else: - self.params = params - self.initial_params = self.params - - if group is None: - self.group = [i for i in range(d)] - else: - self.group = group - self.d = d - else: - self.offset = offset - self.optkernel = kernel_name - self.gamma = gamma - if ard_gamma is None: - self.ard_gamma = torch.ones(d).double() - else: - try: - self.ard_gamma = torch.Tensor([ard_gamma]).double() - except: - self.ard_gamma = ard_gamma - self.power = power - self.v = nu - - if params is not None: - self.initial_params = params - else: - self.initial_params = {'kappa':kappa} - - if cov is None: - self.cov = torch.eye(d).double() - else: - self.cov = cov - - if group is None: - self.group = [i for i in range(d)] - else: - self.group = group - - self.map = map - self.groups = groups - self.kappa = kappa - self.freq = freq - self.d = d - self.add = False - - self.kernel_function_list = [self.get_kernel_internal()] - self.kernel_diag_function_list = [self.get_kernel_internal(diag = True)] - self.optkernel_list = [self.optkernel] - self.params_dict = {'0': self.params} - self.kernel_items = 1 - - self.operations = ["-"] - - def __combine__(self, second_kernel_object): - self.kernel_function_list = self.kernel_function_list + second_kernel_object.kernel_function_list - self.optkernel_list = self.optkernel_list + second_kernel_object.optkernel_list - self.operations = self.operations + second_kernel_object.operations[1:] - for key, value in second_kernel_object.params_dict.items(): - self.params_dict[str(self.kernel_items)] = value - self.kernel_items += 1 - - def __add__(self, second_kernel_object): - self.__combine__(second_kernel_object) - diff = len(set(second_kernel_object.group) - set(self.group)) - self.d += diff - self.operations.append("+") - return self - - def __mul__(self, second_kernel_object): - self.__combine__(second_kernel_object) - self.operations.append("*") - return self - - def description(self): - desc = "Kernel description:" - for index in range(0, self.kernel_items, 1): - desc = desc + "\n\n\tkernel: " + self.optkernel_list[index] - desc = desc + "\n\toperation: " + self.operations[index] - desc = desc + "\n\t" + "\n\t".join( - ["{0}={1}".format(key, value) for key, value in self.params_dict[str(index)].items()]) - return desc - - def add_groups(self, dict): - for a in self.params_dict.keys(): - if a not in dict.keys(): - dict[a] = {} - dict[a]['group'] = self.params_dict[a]['group'] - return dict - - def kernel_diag(self, a,b, **kwargs): - if len(kwargs) > 0: - # params_dict = list(kwargs) - # we need to send - params_dict = kwargs - self.add_groups(params_dict) - else: - params_dict = self.params_dict - - for i in range(0, len(self.kernel_function_list), 1): - k = self.kernel_diag_function_list[i] - if str(i) in params_dict.keys(): - arg = params_dict[str(i)] - else: - arg = {} - if self.operations[i] == "+": - output = output + k(a, b, **arg) - elif self.operations[i] == "*": - output = output * k(a, b, **arg) - else: - output = k(a, b, **arg) - - return output - - def kernel(self, a, b, **kwargs): - - if len(kwargs) > 0: - # params_dict = list(kwargs) - # we need to send - params_dict = kwargs - self.add_groups(params_dict) - else: - params_dict = self.params_dict - - for i in range(0, len(self.kernel_function_list), 1): - k = self.kernel_function_list[i] - if str(i) in params_dict.keys(): - arg = params_dict[str(i)] - else: - arg = {} - if self.operations[i] == "+": - output = output + k(a, b, **arg) - elif self.operations[i] == "*": - output = output * k(a, b, **arg) - else: - output = k(a, b, **arg) - - return output - - def get_param_refs(self): - return self.params_dict - - def get_kernel(self): - return self.kernel - - def get_kernel_internal(self, diag = False): - - self.params = {**self.initial_params, 'kappa': self.kappa, 'group': self.group, 'offset': self.offset} - - if self.optkernel == "squared_exponential": - self.params = dict(**self.params, **{'gamma': self.gamma}) - if diag: - return squared_exponential_kernel_diag - else: - return self.squared_exponential_kernel - - elif self.optkernel == "ard" and (self.groups is None): - self.params = dict(**self.params, **{'ard_gamma': self.ard_gamma}) - if diag: - return self.ard_kernel - else: - return self.ard_kernel_diag - - - elif self.optkernel == "linear": - return self.linear_kernel - - elif self.optkernel == "laplace": - self.params = dict(**self.params, **{'gamma': self.gamma}) - return self.laplace_kernel - - elif self.optkernel == "modified_matern": - self.params = dict(**self.params, **{'gamma': self.gamma, 'nu': self.v}) - return self.modified_matern_kernel - - elif self.optkernel == "custom": - return self.kernel_function - - elif self.optkernel == "tanh": - return self.tanh_kernel - - elif self.optkernel == 'step': - return self.step_kernel - - elif self.optkernel == "angsim": - return self.angsim_kernel - - elif self.optkernel == "matern": - self.params = dict(**self.params, **{'gamma': self.gamma, 'nu': self.v}) - return self.matern_kernel - - elif self.optkernel == "ard_matern": - self.params = dict(**self.params, **{'ard_gamma': self.ard_gamma, 'nu': self.v}) - - if diag: - return self.ard_matern_kernel_diag - else: - return self.ard_matern_kernel - - elif self.optkernel == "full_covariance_se": - self.params = dict(**self.params, **{'cov': self.cov}) - return self.covar_kernel - - elif self.optkernel == "full_covariance_matern": - self.params = dict(**self.params, **{'cov': self.cov, 'nu': self.v}) - return self.covar_kernel_matern - - elif (self.optkernel == "polynomial") and (self.groups is None): - self.params = dict(**self.params, **{'degree': self.power}) - return self.polynomial_kernel - - elif (self.optkernel == "polynomial") and (self.groups is not None): - self.params = dict(**self.params, **{'degree': self.power, 'groups': self.groups}) - return self.polynomial_additive_kernel - - elif self.optkernel == "ard" and (self.groups is not None): - self.params = dict(**self.params, **{'ard_gamma': self.ard_gamma, 'groups': self.groups}) - return self.ard_kernel_additive - - elif self.optkernel == "squared_exponential_per_group" and (self.groups is not None): - self.params = dict(**self.params, **{'groups': self.groups}) - return self.squared_exponential_per_group_kernel_additive - - elif self.optkernel == "ard_per_group" and (self.groups is not None): - self.params = dict(**self.params, **{'groups': self.groups}) - return self.ard_per_group_kernel_additive - - elif self.optkernel == "gibbs": - self.params = dict(**self.params, **{'groups': self.groups}) - return self.gibbs_kernel - - elif self.optkernel == "gibbs_custom": - self.params = dict(**self.params, **{'groups': self.groups}) - return self.gibbs_custom_kernel - - elif self.optkernel == "random_map": - return self.random_map_kernel - - else: - raise AssertionError("Kernel not implemented.") - - def embed(self, x): - if self.optkernel == "linear": - return x - else: - raise AttributeError("This type of kernel does not support a finite dimensional embedding") - - def get_basis_size(self): - if self.optkernel == "linear": - return self.d - else: - raise AttributeError("This type of kernel does not support a finite dimensional embedding") - - def step_kernel(self, a, b, **kwargs): - if 'kappa' in kwargs.keys(): - kappa = kwargs['kappa'] - else: - kappa = self.kappa - - if 'group' in kwargs.keys(): - group = kwargs['group'] - else: - group = self.group - - a = a[:, group] - b = b[:, group] - - n, d = a.size() - m, d = b.size() - - K = torch.zeros(size=(n, m)).double() - - for i in range(n): - for j in range(m): - K[i, j] = a[i, :] + b[j, :] - torch.abs(a[i, :] - b[j, :]) - - return kappa * K.T - - def linear_kernel(self, a, b, **kwargs): - """ - GP linear kernel - """ - if 'kappa' in kwargs.keys(): - kappa = kwargs['kappa'] - else: - kappa = self.kappa - - if 'group' in kwargs.keys(): - group = kwargs['group'] - else: - group = self.group - - if 'offset' in kwargs.keys(): - offset = kwargs['offset'] - else: - offset = self.offset - a = a[:, group] - b = b[:, group] - return kappa * (b @ a.T) + offset - - def custom_map_kernel(self, a, b, **kwargs): - if 'kappa' in kwargs.keys(): - kappa = kwargs['kappa'] - else: - kappa = self.kappa - - if 'group' in kwargs.keys(): - group = kwargs['group'] - else: - group = self.group - - - if 'map' in kwargs.keys(): - map = kwargs['map'] - else: - map = self.map - - a = a[:, group] - b = b[:, group] - - if map is not None: - return kappa * self.linear_kernel(torch.t(self.map.map(a)), torch.t(self.map.map(b))).detach() - else: - return kappa * self.linear_kernel(a, b) - - def laplace_kernel(self, a, b, **kwargs): - if 'gamma' in kwargs.keys(): - gamma = kwargs['gamma'] - else: - gamma = self.gamma - - if 'kappa' in kwargs.keys(): - kappa = kwargs['kappa'] - else: - kappa = self.kappa - if 'group' in kwargs.keys(): - group = kwargs['group'] - else: - group = self.group - - a = a[:, group] - b = b[:, group] - K = - manhattan_distances(a, b) / gamma ** 2 - K = np.exp(K) # exponentiate K in-place - return kappa * torch.from_numpy(K).T - - def squared_exponential_kernel(self, a, b, **kwargs): - """ - GP squared exponential kernel - """ - if 'gamma' in kwargs.keys(): - gamma = kwargs['gamma'] - else: - gamma = self.gamma - - if 'kappa' in kwargs.keys(): - kappa = kwargs['kappa'] - else: - kappa = self.kappa - - if 'group' in kwargs.keys(): - group = kwargs['group'] - else: - group = self.group - - a = a[:, group] - b = b[:, group] - # print (a.shape, b.shape) - normx = torch.sum(a ** 2, dim=1).view(-1, 1) - normy = torch.sum(b ** 2, dim=1).view(-1, 1) - - product = torch.mm(b, torch.t(a)) - # sqdist = torch.tile(normx, b.shape[0]).T + torch.tile(normy, a.shape[0]) - 2 * product - sqdist = -2 * product + torch.t(normx) + normy - arg = (-0.5 / (gamma * gamma)) * sqdist - res = torch.exp(arg) - return kappa * res - - def gibbs_custom_kernel(self, a, b, **kwargs): - if 'gamma_fun' in kwargs.keys(): - gamma_fun = kwargs['gamma_fun'] - else: - raise AttributeError("Missing gamma_fun in Gibbs kernel definition.") - - if 'kappa' in kwargs.keys(): - kappa = kwargs['kappa'] - else: - kappa = self.kappa - if 'group' in kwargs.keys(): - group = kwargs['group'] - else: - group = self.group - - a = a[:, group] - b = b[:, group] - # print (a.shape, b.shape) - normx = torch.sum(a ** 2, dim=1).view(-1, 1) - normy = torch.sum(b ** 2, dim=1).view(-1, 1) - - product = torch.mm(b, torch.t(a)) - # sqdist = torch.tile(normx, b.shape[0]).T + torch.tile(normy, a.shape[0]) - 2 * product - sqdist = -2 * product + torch.t(normx) + normy - - lengthscales = gamma_fun(a, b) - - arg = (-0.5 / lengthscales) * sqdist - res = torch.exp(arg) - return kappa * res - - def gibbs_kernel(self, a, b, **kwargs): - if 'gamma_fun' in kwargs.keys(): - gamma_fun = kwargs['gamma_fun'] - else: - raise AttributeError("Missing gamma_fun in Gibbs kernel definition.") - - if 'kappa' in kwargs.keys(): - kappa = kwargs['kappa'] - else: - kappa = self.kappa - if 'group' in kwargs.keys(): - group = kwargs['group'] - else: - group = self.group - - a = a[:, group] - b = b[:, group] - # print (a.shape, b.shape) - normx = torch.sum(a ** 2, dim=1).view(-1, 1) - normy = torch.sum(b ** 2, dim=1).view(-1, 1) - - product = torch.mm(b, torch.t(a)) - # sqdist = torch.tile(normx, b.shape[0]).T + torch.tile(normy, a.shape[0]) - 2 * product - sqdist = -2 * product + torch.t(normx) + normy - - lengthscales = (gamma_fun(a) ** 2 + gamma_fun(b).T ** 2) - - print(lengthscales) - - arg = (-0.5 / lengthscales) * sqdist - res = torch.exp(arg) - return kappa * res - - def covar_kernel(self, a, b, **kwargs): - """ - :param a: - :param b: - :param cov: square-root of the covariance matrix - :return: - """ - - if 'kappa' in kwargs.keys(): - kappa = kwargs['kappa'] - else: - kappa = self.kappa - - if 'cov' in kwargs.keys(): - cov = kwargs['cov'] - else: - cov = self.cov - if 'group' in kwargs.keys(): - group = kwargs['group'] - else: - group = self.group - - a = a[:, group] - b = b[:, group] - a = torch.mm(a, cov) - b = torch.mm(b, cov) - - normx = torch.sum(a ** 2, dim=1).reshape(-1, 1) - normy = torch.sum(b ** 2, dim=1).reshape(-1, 1) - - product = torch.mm(b, torch.t(a)) - sqdist = -2 * product + torch.t(normx) + normy - arg = - 0.5 * sqdist - res = torch.exp(arg) - return kappa * res - - - def covar_kernel_matern(self, a, b, **kwargs): - """ - :param a: - :param b: - :param cov: square-root of the covariance matrix - :return: - """ - - if 'kappa' in kwargs.keys(): - kappa = kwargs['kappa'] - else: - kappa = self.kappa - - if 'cov' in kwargs.keys(): - cov = kwargs['cov'] - else: - cov = self.cov - if 'v' in kwargs.keys(): - v = kwargs['v'] - else: - v = self.v - if 'group' in kwargs.keys(): - group = kwargs['group'] - else: - group = self.group - - a = a[:, group] - b = b[:, group] - a = torch.mm(a, cov) - b = torch.mm(b, cov) - - dists = torch.cdist(a, b, p=2).T - - if v == 0.5: - K = torch.exp(-dists) - elif v == 1.5: - K = dists * np.sqrt(3) - K = (1. + K) * torch.exp(-K) - elif v == 2.5: - K = dists * np.sqrt(5) - K = (1. + K + K ** 2 / 3.0) * torch.exp(-K) - else: # general case; expensive to evaluate - K = dists - K[K == 0.0] += np.finfo(float).eps # strict zeros result in nan - tmp = (np.sqrt(2 * v) * K) - K.fill((2 ** (1. - v)) / math.gamma(v)) - K *= tmp ** v - K *= kv(v, tmp) - return kappa * K - - - def ard_kernel(self, a, b, **kwargs): - - if 'kappa' in kwargs.keys(): - kappa = kwargs['kappa'] - else: - kappa = self.kappa - - if 'ard_gamma' in kwargs.keys(): - gamma = kwargs['ard_gamma'] - else: - gamma = self.ard_gamma - - if 'group' in kwargs.keys(): - group = kwargs['group'] - else: - group = self.group - - a = a[:, group] - b = b[:, group] - - D = torch.diag(1. / (gamma[group])) - a = torch.mm(a, D) - b = torch.mm(b, D) - normx = torch.sum(a ** 2, dim=1).reshape(-1, 1) - normy = torch.sum(b ** 2, dim=1).reshape(-1, 1) - - product = torch.mm(b, torch.t(a)) - # sqdist = torch.tile(normx, b.shape[0]).T + torch.tile(normy, a.shape[0]) - 2 * product - sqdist = -2 * product + torch.t(normx) + normy - arg = - 0.5 * sqdist - res = torch.exp(arg) - return kappa * res - - def ard_kernel_diag(self, a, b, **kwargs): - - if 'kappa' in kwargs.keys(): - kappa = kwargs['kappa'] - else: - kappa = self.kappa - - if 'ard_gamma' in kwargs.keys(): - gamma = kwargs['ard_gamma'] - else: - gamma = self.ard_gamma - - if 'group' in kwargs.keys(): - group = kwargs['group'] - else: - group = self.group - - a = a[:, group] - b = b[:, group] - - D = torch.diag(1. / (gamma[group])) - a = torch.mm(a, D) - b = torch.mm(b, D) - normx = torch.sum(a ** 2, dim=1).reshape(-1, 1) - normy = torch.sum(b ** 2, dim=1).reshape(-1, 1) - - product = torch.mm(b, torch.t(a)) - # sqdist = torch.tile(normx, b.shape[0]).T + torch.tile(normy, a.shape[0]) - 2 * product - sqdist = -2 * product + torch.t(normx) + normy - arg = - 0.5 * sqdist - res = torch.exp(arg) - return kappa * res - - - - def ard_per_group_kernel_additive(self,a,b,**kwargs): - if 'kappa' in kwargs.keys(): - kappa = kwargs['kappa'] - else: - kappa = self.kappa - - if 'groups' in kwargs.keys(): - groups = kwargs['groups'] - else: - groups = self.groups - - if 'ard_per_group' in kwargs.keys(): - ard_per_group = kwargs['ard_per_group'] - else: - raise AssertionError("This kernel requires 'ard_per_group' initial parameters") - - (n, z) = tuple(a.size()) - (q, m) = tuple(b.size()) - - r = torch.zeros(size=(q, n), dtype=torch.float64) - groups_index = 0 - - for group_add in groups: - kwargs['group'] = group_add - - size_group = len(group_add) - # use per group lenghtscale - #kwargs['ard_gamma'] = ard_per_group[groups_index:groups_index+size_group] - gamma = ard_per_group[groups_index:groups_index+size_group] - groups_index +=size_group - - ax = a[:, group_add] - bx = b[:, group_add] - D = torch.diag(1. / (gamma)) - ax = torch.mm(ax, D) - bx = torch.mm(bx, D) - normx = torch.sum(ax ** 2, dim=1).reshape(-1, 1) - normy = torch.sum(bx ** 2, dim=1).reshape(-1, 1) - product = torch.mm(bx, torch.t(ax)) - # sqdist = torch.tile(normx, b.shape[0]).T + torch.tile(normy, a.shape[0]) - 2 * product - sqdist = -2 * product + torch.t(normx) + normy - arg = - 0.5 * sqdist - res = torch.exp(arg) - r = r + res - - r = r / float(len(groups)) - return kappa*r - - def squared_exponential_per_group_kernel_additive(self,a,b,**kwargs): - if 'kappa' in kwargs.keys(): - kappa = kwargs['kappa'] - else: - kappa = self.kappa - - if 'groups' in kwargs.keys(): - groups = kwargs['groups'] - else: - groups = self.groups - - if 'gamma_per_group' in kwargs.keys(): - gamma_per_group = kwargs['gamma_per_group'] - else: - raise AssertionError("This kernel requires 'gamma_per_group' initial parameters") - - (n, z) = tuple(a.size()) - (q, m) = tuple(b.size()) - - r = torch.zeros(size=(q, n), dtype=torch.float64) - - for group_add, gamma in zip(groups,gamma_per_group): - kwargs['group'] = group_add - - # use per group lenghtscale - kwargs['gamma'] = gamma - - r = r + self.squared_exponential_kernel(a, b, **kwargs) - - r = kappa * r / float(len(groups)) - return r - - def ard_kernel_additive(self, a, b, **kwargs): - if 'kappa' in kwargs.keys(): - kappa = kwargs['kappa'] - else: - kappa = self.kappa - - if 'groups' in kwargs.keys(): - groups = kwargs['groups'] - else: - groups = self.groups - - if 'group' in kwargs.keys(): - group = kwargs['group'] - else: - group = self.group - - a = a[:, group] - b = b[:, group] - - (n, z) = tuple(a.size()) - (q, m) = tuple(b.size()) - - r = torch.zeros(size=(q, n), dtype=torch.float64) - - for group_add in groups: - kwargs['group'] = group_add - r = r + self.ard_kernel(a, b, **kwargs) - - r = r / float(len(groups)) - return r - - def tanh_kernel(self, a, b, **kwargs): - """ - GP squared exponential kernel - """ - # print (a.shape, b.shape) - - if 'kappa' in kwargs.keys(): - kappa = kwargs['kappa'] - else: - kappa = self.kappa - - if 'group' in kwargs.keys(): - group = kwargs['group'] - else: - group = self.group - - a = a[:, group] - b = b[:, group] - - X, Y = check_pairwise_arrays(a.numpy(), b.numpy()) - K = manhattan_distances(a.numpy(), b.numpy()) - K = K.T - eps = 10e-10 - q = 3 - A = (np.tanh(K) ** q) / (eps + K ** q) - return kappa * torch.from_numpy(A) - - def angsim_kernel(self, a, b, **kwargs): - if 'kappa' in kwargs.keys(): - kappa = kwargs['kappa'] - else: - kappa = self.kappa - - return kappa * (2. / np.pi) * np.arcsin((a.dot(b)) / (a.norm() * b.norm())) - - def polynomial_kernel(self, a, b, **kwargs): - if 'kappa' in kwargs.keys(): - kappa = kwargs['kappa'] - else: - kappa = self.kappa - if 'degree' in kwargs.keys(): - power = kwargs['degree'] - else: - power = self.power - if 'group' in kwargs.keys(): - group = kwargs['group'] - else: - group = self.group - - a = a[:, group] - b = b[:, group] - - K = (torch.mm(b, torch.t(a)) + 1) ** power - return kappa * K - - def polynomial_additive_kernel(self, a, b, **kwargs): - - if 'groups' in kwargs.keys(): - groups = kwargs['groups'] - else: - groups = self.groups - if 'group' in kwargs.keys(): - group = kwargs['group'] - else: - group = self.group - - a = a[:, group] - b = b[:, group] - - (n, z) = tuple(a.size()) - (q, m) = tuple(b.size()) - no_groups = float(len(groups)) - r = torch.zeros(size=(q, n), dtype=torch.float64) - for i, group in enumerate(groups): - z = self.polynomial_kernel(a[:, group], b[:, group], **kwargs) - r = r + z - r = r / no_groups - return r - - - def matern_kernel(self, a, b, **kwargs): - """ - :param a: matrices - :param b: matrices - :param gamma: smoothness - :param v: Bessel function type - :return: - """ - - if 'kappa' in kwargs.keys(): - kappa = kwargs['kappa'] - else: - kappa = self.kappa - - if 'nu' in kwargs.keys(): - v = kwargs['nu'] - else: - v = self.v - - if 'gamma' in kwargs.keys(): - gamma = kwargs['gamma'] - else: - gamma = self.gamma - - if 'group' in kwargs.keys(): - group = kwargs['group'] - else: - group = self.group - - a = a[:, group].numpy() - b = b[:, group].numpy() - - dists = cdist(a / gamma, b / gamma, metric='euclidean').T - if v == 0.5: - K = np.exp(-dists) - elif v == 1.5: - K = dists * math.sqrt(3) - K = (1. + K) * np.exp(-K) - elif v == 2.5: - K = dists * math.sqrt(5) - K = (1. + K + K ** 2 / 3.0) * np.exp(-K) - else: # general case; expensive to evaluate - K = dists - K[K == 0.0] += np.finfo(float).eps # strict zeros result in nan - tmp = (math.sqrt(2 * v) * K) - K.fill((2 ** (1. - v)) / math.gamma(v)) - K *= tmp ** v - K *= kv(v, tmp) - return kappa * torch.from_numpy(K) - - - def ard_matern_kernel_diag(self, a, b, **kwargs): - """ - :param a: matrices - :param b: matrices - :param gamma: smoothness - :param v: Bessel function type - :return: - """ - if 'kappa' in kwargs.keys(): - kappa = kwargs['kappa'] - else: - kappa = self.kappa - - if 'nu' in kwargs.keys(): - v = kwargs['nu'] - else: - v = self.v - - if 'ard_gamma' in kwargs.keys(): - ard_gamma = kwargs['ard_gamma'] - else: - ard_gamma = self.ard_gamma - - if 'group' in kwargs.keys(): - group = kwargs['group'] - else: - group = self.group - - D = torch.diag(1. / (ard_gamma[group])) - a = torch.mm(a, D) - b = torch.mm(b, D) - - a = a[:, group] - b = b[:, group] - - #dists = torch.cdist(a , b , p = 2).T - dists = torch.sqrt(torch.sum((a - b)**2)) - - if v == 0.5: - K = torch.exp(-dists) - elif v == 1.5: - K = dists * np.sqrt(3) - K = (1. + K) * torch.exp(-K) - elif v == 2.5: - K = dists * np.sqrt(5) - K = (1. + K + K ** 2 / 3.0) * torch.exp(-K) - else: # general case; expensive to evaluate - K = dists - K[K == 0.0] += np.finfo(float).eps # strict zeros result in nan - tmp = (np.sqrt(2 * v) * K) - K.fill((2 ** (1. - v)) / math.gamma(v)) - K *= tmp ** v - K *= kv(v, tmp) - return kappa * K - - def ard_matern_kernel(self, a, b, **kwargs): - """ - :param a: matrices - :param b: matrices - :param gamma: smoothness - :param v: Bessel function type - :return: - """ - - if 'kappa' in kwargs.keys(): - kappa = kwargs['kappa'] - else: - kappa = self.kappa - - if 'nu' in kwargs.keys(): - v = kwargs['nu'] - else: - v = self.v - - if 'ard_gamma' in kwargs.keys(): - ard_gamma = kwargs['ard_gamma'] - else: - ard_gamma = self.ard_gamma - - if 'group' in kwargs.keys(): - group = kwargs['group'] - else: - group = self.group - - D = torch.diag(1. / (ard_gamma[group])) - a = torch.mm(a, D) - b = torch.mm(b, D) - - a = a[:, group] - b = b[:, group] - - dists = torch.cdist(a , b , p = 2).T - - if v == 0.5: - K = torch.exp(-dists) - elif v == 1.5: - K = dists * np.sqrt(3) - K = (1. + K) * torch.exp(-K) - elif v == 2.5: - K = dists * np.sqrt(5) - K = (1. + K + K ** 2 / 3.0) * torch.exp(-K) - else: # general case; expensive to evaluate - K = dists - K[K == 0.0] += np.finfo(float).eps # strict zeros result in nan - tmp = (np.sqrt(2 * v) * K) - K.fill((2 ** (1. - v)) / math.gamma(v)) - K *= tmp ** v - K *= kv(v, tmp) - return kappa * K - - def modified_matern_kernel(self, X, Y, **kwargs): - """ - :param a: matrices - :param b: matrices - :param gamma: smoothness - :param v: Bessel function type - :return: - """ - if 'kappa' in kwargs.keys(): - kappa = kwargs['kappa'] - else: - kappa = self.kappa - - if 'nu' in kwargs.keys(): - v = kwargs['nu'] - else: - v = self.v - - if 'gamma' in kwargs.keys(): - gamma = kwargs['gamma'] - else: - gamma = self.gamma - - if 'group' in kwargs.keys(): - group = kwargs['group'] - else: - group = self.group - - a = a[:, group] - b = b[:, group] - - d = X.size()[1] - # Z = np.ones(shape = (X.shape[0],Y.shape[0])) - Z = torch.ones(size=(Y.size()[0], X.size()[0]), dtype=torch.float64) - for i in range(d): - a = X[:, i].view(-1, 1) - b = Y[:, i].view(-1, 1) - # dists = cdist(a/gamma,b/gamma,metric='cityblock').T - dists = cdist(a.numpy() / gamma, b.numpy() / gamma, metric='euclidean').T - # dists = manhattan_distances(a, b).T/ gamma - dists = torch.from_numpy(dists) - if v == 1: - K = torch.exp(-dists) - elif v == 2: - K = (1 + dists) * torch.exp(-dists) - elif v == 3: - K = (dists ** 2 + 3 * torch.abs(dists) + 3) * torch.exp(-dists) / 3. - elif v == 4: - K = (dists ** 3 + 6 * dists ** 2 + 15 * torch.abs(dists) + 15) * torch.exp(-dists) / 15. - else: - raise AssertionError("Kernel with nu = " + str(v) + "not implemented.") - Z = Z * K - return kappa * Z - - def spectral_kernel(self, a, b): - if self.freq is not None: - (n, d) = a.size() - (m, d) = b.size() - dist = torch.zeros(size=(n, m), dtype=torch.float64) - c = 0 - for x in a: - z = 0 - for y in b: - dist[c, z] = torch.sum(torch.cos(torch.mm(x.view(1, 1) - y.view(1, 1), self.freq))) - z = z + 1 - c = c + 1 - N = self.freq.size()[0] - return torch.t(dist) / N - else: - raise AssertionError("No frequencies passed") - - def wiener_kernel(self, a, b): - """ - Wiener process kernel - k(x,y) = min(x,y) - k(x,y) = \sum_i min(x_i,y_i) - """ - (n, d) = a.size() - (m, d) = b.size() - dist = torch.zeros(size=(n, m)) - # dist = 0.1*np.eye(max(n,m))[0:m,0:n] - c = 0 - for x in a: - z = 0 - for y in b: - print(x, y) - dist[c, z] = torch.from_numpy(np.sum(np.min(np.array([x, y]), axis=0))) - z = z + 1 - c = c + 1 - - # print (dist) - return dist.T - - def derivative_1(self, fixed, x): - """ - - """ - d = x.size()[1] - n = x.size()[0] - - size = fixed.size()[0] - - if self.optkernel == "squared_exponential": - k_original = self.squared_exponential_kernel(fixed, x) - second = fixed.unsqueeze(1) - x - second = second / self.gamma ** 2 - res = self.kappa * torch.einsum('ij,jik->ijk', k_original, second) - else: - raise AssertionError("Not implemented for this kernel") - - # result should be (n,d) - return res - - def derivative_2(self, fixed, x): - """ - - """ - d = x.size()[1] - n = x.size()[0] - - size = fixed.size()[0] - - if self.optkernel == "squared_exponential": - k_original = self.squared_exponential_kernel(fixed, x) - second = fixed.unsqueeze(1) - x - second = second / self.gamma ** 2 - second2 = torch.einsum('ijk,ijl->ijkl', second, second) - res1 = torch.einsum('ij,jikl->ijkl', k_original, second2) - - ones = torch.zeros(size=(size, n, d, d)) - for j in range(d): - ones[:, :, j, j] = 1. - ones = -ones / self.gamma ** 2 - res2 = torch.einsum('ij,jikl->ijkl', k_original, ones) - res = self.kappa * (res1 + res2) - # res = self.kappa * res2 - else: - raise AssertionError("Not implemented for this kernel") - - return res - - def square_dist(self, a, b): - if (a.shape == b.shape): - normx = np.sum(a ** 2, axis=1).reshape(-1, 1) - normy = np.sum(b ** 2, axis=1).reshape(-1, 1) - else: - normx = np.sum(a ** 2, axis=1).reshape(-1, 1) - normy = np.sum(b ** 2, axis=1).reshape(-1, 1) - - product = b.dot(a.T) - sqdist = np.tile(normx, b.shape[0]).T + np.tile(normy, a.shape[0]) - 2 * product - return sqdist + def __init__( + self, + kernel_function=None, + kernel_name="squared_exponential", + freq=None, + groups=None, + d=1, + gamma=1, + ard_gamma=None, + nu=1.5, + kappa=1, + map=None, + power=2, + cov=None, + params=None, + group=None, + offset=0.0, + ): + + if kernel_function is not None: + self.kernel_function = kernel_function + self.optkernel = "custom" + self.kappa = kappa + self.offset = offset + if params is None: + self.params = {"kappa": self.kappa} + else: + self.params = params + self.initial_params = self.params + + if group is None: + self.group = [i for i in range(d)] + else: + self.group = group + self.d = d + else: + self.offset = offset + self.optkernel = kernel_name + self.gamma = gamma + if ard_gamma is None: + self.ard_gamma = torch.ones(d).double() + else: + try: + self.ard_gamma = torch.Tensor([ard_gamma]).double() + except: + self.ard_gamma = ard_gamma + self.power = power + self.v = nu + + if params is not None: + self.initial_params = params + else: + self.initial_params = {"kappa": kappa} + + if cov is None: + self.cov = torch.eye(d).double() + else: + self.cov = cov + + if group is None: + self.group = [i for i in range(d)] + else: + self.group = group + + self.map = map + self.groups = groups + self.kappa = kappa + self.freq = freq + self.d = d + self.add = False + + self.kernel_function_list = [self.get_kernel_internal()] + self.kernel_diag_function_list = [self.get_kernel_internal(diag=True)] + self.optkernel_list = [self.optkernel] + self.params_dict = {"0": self.params} + self.kernel_items = 1 + + self.operations = ["-"] + + def __combine__(self, second_kernel_object): + self.kernel_function_list = ( + self.kernel_function_list + second_kernel_object.kernel_function_list + ) + self.optkernel_list = self.optkernel_list + second_kernel_object.optkernel_list + self.operations = self.operations + second_kernel_object.operations[1:] + for key, value in second_kernel_object.params_dict.items(): + self.params_dict[str(self.kernel_items)] = value + self.kernel_items += 1 + + def __add__(self, second_kernel_object): + self.__combine__(second_kernel_object) + diff = len(set(second_kernel_object.group) - set(self.group)) + self.d += diff + self.operations.append("+") + return self + + def __mul__(self, second_kernel_object): + self.__combine__(second_kernel_object) + self.operations.append("*") + return self + + def description(self): + desc = "Kernel description:" + for index in range(0, self.kernel_items, 1): + desc = desc + "\n\n\tkernel: " + self.optkernel_list[index] + desc = desc + "\n\toperation: " + self.operations[index] + desc = ( + desc + + "\n\t" + + "\n\t".join( + [ + "{0}={1}".format(key, value) + for key, value in self.params_dict[str(index)].items() + ] + ) + ) + return desc + + def add_groups(self, dict): + for a in self.params_dict.keys(): + if a not in dict.keys(): + dict[a] = {} + dict[a]["group"] = self.params_dict[a]["group"] + return dict + + def kernel_diag(self, a, b, **kwargs): + if len(kwargs) > 0: + # params_dict = list(kwargs) + # we need to send + params_dict = kwargs + self.add_groups(params_dict) + else: + params_dict = self.params_dict + + for i in range(0, len(self.kernel_function_list), 1): + k = self.kernel_diag_function_list[i] + if str(i) in params_dict.keys(): + arg = params_dict[str(i)] + else: + arg = {} + if self.operations[i] == "+": + output = output + k(a, b, **arg) + elif self.operations[i] == "*": + output = output * k(a, b, **arg) + else: + output = k(a, b, **arg) + + return output + + def kernel(self, a, b, **kwargs): + + if len(kwargs) > 0: + # params_dict = list(kwargs) + # we need to send + params_dict = kwargs + self.add_groups(params_dict) + else: + params_dict = self.params_dict + + for i in range(0, len(self.kernel_function_list), 1): + k = self.kernel_function_list[i] + if str(i) in params_dict.keys(): + arg = params_dict[str(i)] + else: + arg = {} + if self.operations[i] == "+": + output = output + k(a, b, **arg) + elif self.operations[i] == "*": + output = output * k(a, b, **arg) + else: + output = k(a, b, **arg) + + return output + + def get_param_refs(self): + return self.params_dict + + def get_kernel(self): + return self.kernel + + def get_kernel_internal(self, diag=False): + + self.params = { + **self.initial_params, + "kappa": self.kappa, + "group": self.group, + "offset": self.offset, + } + + if self.optkernel == "squared_exponential": + self.params = dict(**self.params, **{"gamma": self.gamma}) + if diag: + return squared_exponential_kernel_diag + else: + return self.squared_exponential_kernel + + elif self.optkernel == "ard" and (self.groups is None): + self.params = dict(**self.params, **{"ard_gamma": self.ard_gamma}) + if diag: + return self.ard_kernel + else: + return self.ard_kernel_diag + + elif self.optkernel == "linear": + return self.linear_kernel + + elif self.optkernel == "laplace": + self.params = dict(**self.params, **{"gamma": self.gamma}) + return self.laplace_kernel + + elif self.optkernel == "modified_matern": + self.params = dict(**self.params, **{"gamma": self.gamma, "nu": self.v}) + return self.modified_matern_kernel + + elif self.optkernel == "custom": + return self.kernel_function + + elif self.optkernel == "tanh": + return self.tanh_kernel + + elif self.optkernel == "step": + return self.step_kernel + + elif self.optkernel == "angsim": + return self.angsim_kernel + + elif self.optkernel == "matern": + self.params = dict(**self.params, **{"gamma": self.gamma, "nu": self.v}) + return self.matern_kernel + + elif self.optkernel == "ard_matern": + self.params = dict( + **self.params, **{"ard_gamma": self.ard_gamma, "nu": self.v} + ) + + if diag: + return self.ard_matern_kernel_diag + else: + return self.ard_matern_kernel + + elif self.optkernel == "full_covariance_se": + self.params = dict(**self.params, **{"cov": self.cov}) + return self.covar_kernel + + elif self.optkernel == "full_covariance_matern": + self.params = dict(**self.params, **{"cov": self.cov, "nu": self.v}) + return self.covar_kernel_matern + + elif (self.optkernel == "polynomial") and (self.groups is None): + self.params = dict(**self.params, **{"degree": self.power}) + return self.polynomial_kernel + + elif (self.optkernel == "polynomial") and (self.groups is not None): + self.params = dict( + **self.params, **{"degree": self.power, "groups": self.groups} + ) + return self.polynomial_additive_kernel + + elif self.optkernel == "ard" and (self.groups is not None): + self.params = dict( + **self.params, **{"ard_gamma": self.ard_gamma, "groups": self.groups} + ) + return self.ard_kernel_additive + + elif self.optkernel == "squared_exponential_per_group" and ( + self.groups is not None + ): + self.params = dict(**self.params, **{"groups": self.groups}) + return self.squared_exponential_per_group_kernel_additive + + elif self.optkernel == "ard_per_group" and (self.groups is not None): + self.params = dict(**self.params, **{"groups": self.groups}) + return self.ard_per_group_kernel_additive + + elif self.optkernel == "gibbs": + self.params = dict(**self.params, **{"groups": self.groups}) + return self.gibbs_kernel + + elif self.optkernel == "gibbs_custom": + self.params = dict(**self.params, **{"groups": self.groups}) + return self.gibbs_custom_kernel + + elif self.optkernel == "random_map": + return self.random_map_kernel + + else: + raise AssertionError("Kernel not implemented.") + + def embed(self, x): + if self.optkernel == "linear": + return x + else: + raise AttributeError( + "This type of kernel does not support a finite dimensional embedding" + ) + + def get_basis_size(self): + if self.optkernel == "linear": + return self.d + else: + raise AttributeError( + "This type of kernel does not support a finite dimensional embedding" + ) + + def step_kernel(self, a, b, **kwargs): + if "kappa" in kwargs.keys(): + kappa = kwargs["kappa"] + else: + kappa = self.kappa + + if "group" in kwargs.keys(): + group = kwargs["group"] + else: + group = self.group + + a = a[:, group] + b = b[:, group] + + n, d = a.size() + m, d = b.size() + + K = torch.zeros(size=(n, m)).double() + + for i in range(n): + for j in range(m): + K[i, j] = a[i, :] + b[j, :] - torch.abs(a[i, :] - b[j, :]) + + return kappa * K.T + + def linear_kernel(self, a, b, **kwargs): + """ + GP linear kernel + """ + if "kappa" in kwargs.keys(): + kappa = kwargs["kappa"] + else: + kappa = self.kappa + + if "group" in kwargs.keys(): + group = kwargs["group"] + else: + group = self.group + + if "offset" in kwargs.keys(): + offset = kwargs["offset"] + else: + offset = self.offset + a = a[:, group] + b = b[:, group] + return kappa * (b @ a.T) + offset + + def custom_map_kernel(self, a, b, **kwargs): + if "kappa" in kwargs.keys(): + kappa = kwargs["kappa"] + else: + kappa = self.kappa + + if "group" in kwargs.keys(): + group = kwargs["group"] + else: + group = self.group + + if "map" in kwargs.keys(): + map = kwargs["map"] + else: + map = self.map + + a = a[:, group] + b = b[:, group] + + if map is not None: + return ( + kappa + * self.linear_kernel( + torch.t(self.map.map(a)), torch.t(self.map.map(b)) + ).detach() + ) + else: + return kappa * self.linear_kernel(a, b) + + def laplace_kernel(self, a, b, **kwargs): + if "gamma" in kwargs.keys(): + gamma = kwargs["gamma"] + else: + gamma = self.gamma + + if "kappa" in kwargs.keys(): + kappa = kwargs["kappa"] + else: + kappa = self.kappa + if "group" in kwargs.keys(): + group = kwargs["group"] + else: + group = self.group + + a = a[:, group] + b = b[:, group] + K = -manhattan_distances(a, b) / gamma**2 + K = np.exp(K) # exponentiate K in-place + return kappa * torch.from_numpy(K).T + + def squared_exponential_kernel(self, a, b, **kwargs): + """ + GP squared exponential kernel + """ + if "gamma" in kwargs.keys(): + gamma = kwargs["gamma"] + else: + gamma = self.gamma + + if "kappa" in kwargs.keys(): + kappa = kwargs["kappa"] + else: + kappa = self.kappa + + if "group" in kwargs.keys(): + group = kwargs["group"] + else: + group = self.group + + a = a[:, group] + b = b[:, group] + # print (a.shape, b.shape) + normx = torch.sum(a**2, dim=1).view(-1, 1) + normy = torch.sum(b**2, dim=1).view(-1, 1) + + product = torch.mm(b, torch.t(a)) + # sqdist = torch.tile(normx, b.shape[0]).T + torch.tile(normy, a.shape[0]) - 2 * product + sqdist = -2 * product + torch.t(normx) + normy + arg = (-0.5 / (gamma * gamma)) * sqdist + res = torch.exp(arg) + return kappa * res + + def gibbs_custom_kernel(self, a, b, **kwargs): + if "gamma_fun" in kwargs.keys(): + gamma_fun = kwargs["gamma_fun"] + else: + raise AttributeError("Missing gamma_fun in Gibbs kernel definition.") + + if "kappa" in kwargs.keys(): + kappa = kwargs["kappa"] + else: + kappa = self.kappa + if "group" in kwargs.keys(): + group = kwargs["group"] + else: + group = self.group + + a = a[:, group] + b = b[:, group] + # print (a.shape, b.shape) + normx = torch.sum(a**2, dim=1).view(-1, 1) + normy = torch.sum(b**2, dim=1).view(-1, 1) + + product = torch.mm(b, torch.t(a)) + # sqdist = torch.tile(normx, b.shape[0]).T + torch.tile(normy, a.shape[0]) - 2 * product + sqdist = -2 * product + torch.t(normx) + normy + + lengthscales = gamma_fun(a, b) + + arg = (-0.5 / lengthscales) * sqdist + res = torch.exp(arg) + return kappa * res + + def gibbs_kernel(self, a, b, **kwargs): + if "gamma_fun" in kwargs.keys(): + gamma_fun = kwargs["gamma_fun"] + else: + raise AttributeError("Missing gamma_fun in Gibbs kernel definition.") + + if "kappa" in kwargs.keys(): + kappa = kwargs["kappa"] + else: + kappa = self.kappa + if "group" in kwargs.keys(): + group = kwargs["group"] + else: + group = self.group + + a = a[:, group] + b = b[:, group] + # print (a.shape, b.shape) + normx = torch.sum(a**2, dim=1).view(-1, 1) + normy = torch.sum(b**2, dim=1).view(-1, 1) + + product = torch.mm(b, torch.t(a)) + # sqdist = torch.tile(normx, b.shape[0]).T + torch.tile(normy, a.shape[0]) - 2 * product + sqdist = -2 * product + torch.t(normx) + normy + + lengthscales = gamma_fun(a) ** 2 + gamma_fun(b).T ** 2 + + print(lengthscales) + + arg = (-0.5 / lengthscales) * sqdist + res = torch.exp(arg) + return kappa * res + + def covar_kernel(self, a, b, **kwargs): + """ + :param a: + :param b: + :param cov: square-root of the covariance matrix + :return: + """ + + if "kappa" in kwargs.keys(): + kappa = kwargs["kappa"] + else: + kappa = self.kappa + + if "cov" in kwargs.keys(): + cov = kwargs["cov"] + else: + cov = self.cov + if "group" in kwargs.keys(): + group = kwargs["group"] + else: + group = self.group + + a = a[:, group] + b = b[:, group] + a = torch.mm(a, cov) + b = torch.mm(b, cov) + + normx = torch.sum(a**2, dim=1).reshape(-1, 1) + normy = torch.sum(b**2, dim=1).reshape(-1, 1) + + product = torch.mm(b, torch.t(a)) + sqdist = -2 * product + torch.t(normx) + normy + arg = -0.5 * sqdist + res = torch.exp(arg) + return kappa * res + + def covar_kernel_matern(self, a, b, **kwargs): + """ + :param a: + :param b: + :param cov: square-root of the covariance matrix + :return: + """ + + if "kappa" in kwargs.keys(): + kappa = kwargs["kappa"] + else: + kappa = self.kappa + + if "cov" in kwargs.keys(): + cov = kwargs["cov"] + else: + cov = self.cov + if "v" in kwargs.keys(): + v = kwargs["v"] + else: + v = self.v + if "group" in kwargs.keys(): + group = kwargs["group"] + else: + group = self.group + + a = a[:, group] + b = b[:, group] + a = torch.mm(a, cov) + b = torch.mm(b, cov) + + dists = torch.cdist(a, b, p=2).T + + if v == 0.5: + K = torch.exp(-dists) + elif v == 1.5: + K = dists * np.sqrt(3) + K = (1.0 + K) * torch.exp(-K) + elif v == 2.5: + K = dists * np.sqrt(5) + K = (1.0 + K + K**2 / 3.0) * torch.exp(-K) + else: # general case; expensive to evaluate + K = dists + K[K == 0.0] += np.finfo(float).eps # strict zeros result in nan + tmp = np.sqrt(2 * v) * K + K.fill((2 ** (1.0 - v)) / math.gamma(v)) + K *= tmp**v + K *= kv(v, tmp) + return kappa * K + + def ard_kernel(self, a, b, **kwargs): + + if "kappa" in kwargs.keys(): + kappa = kwargs["kappa"] + else: + kappa = self.kappa + + if "ard_gamma" in kwargs.keys(): + gamma = kwargs["ard_gamma"] + else: + gamma = self.ard_gamma + + if "group" in kwargs.keys(): + group = kwargs["group"] + else: + group = self.group + + a = a[:, group] + b = b[:, group] + + D = torch.diag(1.0 / (gamma[group])) + a = torch.mm(a, D) + b = torch.mm(b, D) + normx = torch.sum(a**2, dim=1).reshape(-1, 1) + normy = torch.sum(b**2, dim=1).reshape(-1, 1) + + product = torch.mm(b, torch.t(a)) + # sqdist = torch.tile(normx, b.shape[0]).T + torch.tile(normy, a.shape[0]) - 2 * product + sqdist = -2 * product + torch.t(normx) + normy + arg = -0.5 * sqdist + res = torch.exp(arg) + return kappa * res + + def ard_kernel_diag(self, a, b, **kwargs): + + if "kappa" in kwargs.keys(): + kappa = kwargs["kappa"] + else: + kappa = self.kappa + + if "ard_gamma" in kwargs.keys(): + gamma = kwargs["ard_gamma"] + else: + gamma = self.ard_gamma + + if "group" in kwargs.keys(): + group = kwargs["group"] + else: + group = self.group + + a = a[:, group] + b = b[:, group] + + D = torch.diag(1.0 / (gamma[group])) + a = torch.mm(a, D) + b = torch.mm(b, D) + normx = torch.sum(a**2, dim=1).reshape(-1, 1) + normy = torch.sum(b**2, dim=1).reshape(-1, 1) + + product = torch.mm(b, torch.t(a)) + # sqdist = torch.tile(normx, b.shape[0]).T + torch.tile(normy, a.shape[0]) - 2 * product + sqdist = -2 * product + torch.t(normx) + normy + arg = -0.5 * sqdist + res = torch.exp(arg) + return kappa * res + + def ard_per_group_kernel_additive(self, a, b, **kwargs): + if "kappa" in kwargs.keys(): + kappa = kwargs["kappa"] + else: + kappa = self.kappa + + if "groups" in kwargs.keys(): + groups = kwargs["groups"] + else: + groups = self.groups + + if "ard_per_group" in kwargs.keys(): + ard_per_group = kwargs["ard_per_group"] + else: + raise AssertionError( + "This kernel requires 'ard_per_group' initial parameters" + ) + + (n, z) = tuple(a.size()) + (q, m) = tuple(b.size()) + + r = torch.zeros(size=(q, n), dtype=torch.float64) + groups_index = 0 + + for group_add in groups: + kwargs["group"] = group_add + + size_group = len(group_add) + # use per group lenghtscale + # kwargs['ard_gamma'] = ard_per_group[groups_index:groups_index+size_group] + gamma = ard_per_group[groups_index : groups_index + size_group] + groups_index += size_group + + ax = a[:, group_add] + bx = b[:, group_add] + D = torch.diag(1.0 / (gamma)) + ax = torch.mm(ax, D) + bx = torch.mm(bx, D) + normx = torch.sum(ax**2, dim=1).reshape(-1, 1) + normy = torch.sum(bx**2, dim=1).reshape(-1, 1) + product = torch.mm(bx, torch.t(ax)) + # sqdist = torch.tile(normx, b.shape[0]).T + torch.tile(normy, a.shape[0]) - 2 * product + sqdist = -2 * product + torch.t(normx) + normy + arg = -0.5 * sqdist + res = torch.exp(arg) + r = r + res + + r = r / float(len(groups)) + return kappa * r + + def squared_exponential_per_group_kernel_additive(self, a, b, **kwargs): + if "kappa" in kwargs.keys(): + kappa = kwargs["kappa"] + else: + kappa = self.kappa + + if "groups" in kwargs.keys(): + groups = kwargs["groups"] + else: + groups = self.groups + + if "gamma_per_group" in kwargs.keys(): + gamma_per_group = kwargs["gamma_per_group"] + else: + raise AssertionError( + "This kernel requires 'gamma_per_group' initial parameters" + ) + + (n, z) = tuple(a.size()) + (q, m) = tuple(b.size()) + + r = torch.zeros(size=(q, n), dtype=torch.float64) + + for group_add, gamma in zip(groups, gamma_per_group): + kwargs["group"] = group_add + + # use per group lenghtscale + kwargs["gamma"] = gamma + + r = r + self.squared_exponential_kernel(a, b, **kwargs) + + r = kappa * r / float(len(groups)) + return r + + def ard_kernel_additive(self, a, b, **kwargs): + if "kappa" in kwargs.keys(): + kappa = kwargs["kappa"] + else: + kappa = self.kappa + + if "groups" in kwargs.keys(): + groups = kwargs["groups"] + else: + groups = self.groups + + if "group" in kwargs.keys(): + group = kwargs["group"] + else: + group = self.group + + a = a[:, group] + b = b[:, group] + + (n, z) = tuple(a.size()) + (q, m) = tuple(b.size()) + + r = torch.zeros(size=(q, n), dtype=torch.float64) + + for group_add in groups: + kwargs["group"] = group_add + r = r + self.ard_kernel(a, b, **kwargs) + + r = r / float(len(groups)) + return r + + def tanh_kernel(self, a, b, **kwargs): + """ + GP squared exponential kernel + """ + # print (a.shape, b.shape) + + if "kappa" in kwargs.keys(): + kappa = kwargs["kappa"] + else: + kappa = self.kappa + + if "group" in kwargs.keys(): + group = kwargs["group"] + else: + group = self.group + + a = a[:, group] + b = b[:, group] + + X, Y = check_pairwise_arrays(a.numpy(), b.numpy()) + K = manhattan_distances(a.numpy(), b.numpy()) + K = K.T + eps = 10e-10 + q = 3 + A = (np.tanh(K) ** q) / (eps + K**q) + return kappa * torch.from_numpy(A) + + def angsim_kernel(self, a, b, **kwargs): + if "kappa" in kwargs.keys(): + kappa = kwargs["kappa"] + else: + kappa = self.kappa + + return kappa * (2.0 / np.pi) * np.arcsin((a.dot(b)) / (a.norm() * b.norm())) + + def polynomial_kernel(self, a, b, **kwargs): + if "kappa" in kwargs.keys(): + kappa = kwargs["kappa"] + else: + kappa = self.kappa + if "degree" in kwargs.keys(): + power = kwargs["degree"] + else: + power = self.power + if "group" in kwargs.keys(): + group = kwargs["group"] + else: + group = self.group + + a = a[:, group] + b = b[:, group] + + K = (torch.mm(b, torch.t(a)) + 1) ** power + return kappa * K + + def polynomial_additive_kernel(self, a, b, **kwargs): + + if "groups" in kwargs.keys(): + groups = kwargs["groups"] + else: + groups = self.groups + if "group" in kwargs.keys(): + group = kwargs["group"] + else: + group = self.group + + a = a[:, group] + b = b[:, group] + + (n, z) = tuple(a.size()) + (q, m) = tuple(b.size()) + no_groups = float(len(groups)) + r = torch.zeros(size=(q, n), dtype=torch.float64) + for i, group in enumerate(groups): + z = self.polynomial_kernel(a[:, group], b[:, group], **kwargs) + r = r + z + r = r / no_groups + return r + + def matern_kernel(self, a, b, **kwargs): + """ + :param a: matrices + :param b: matrices + :param gamma: smoothness + :param v: Bessel function type + :return: + """ + + if "kappa" in kwargs.keys(): + kappa = kwargs["kappa"] + else: + kappa = self.kappa + + if "nu" in kwargs.keys(): + v = kwargs["nu"] + else: + v = self.v + + if "gamma" in kwargs.keys(): + gamma = kwargs["gamma"] + else: + gamma = self.gamma + + if "group" in kwargs.keys(): + group = kwargs["group"] + else: + group = self.group + + a = a[:, group].numpy() + b = b[:, group].numpy() + + dists = cdist(a / gamma, b / gamma, metric="euclidean").T + if v == 0.5: + K = np.exp(-dists) + elif v == 1.5: + K = dists * math.sqrt(3) + K = (1.0 + K) * np.exp(-K) + elif v == 2.5: + K = dists * math.sqrt(5) + K = (1.0 + K + K**2 / 3.0) * np.exp(-K) + else: # general case; expensive to evaluate + K = dists + K[K == 0.0] += np.finfo(float).eps # strict zeros result in nan + tmp = math.sqrt(2 * v) * K + K.fill((2 ** (1.0 - v)) / math.gamma(v)) + K *= tmp**v + K *= kv(v, tmp) + return kappa * torch.from_numpy(K) + + def ard_matern_kernel_diag(self, a, b, **kwargs): + """ + :param a: matrices + :param b: matrices + :param gamma: smoothness + :param v: Bessel function type + :return: + """ + if "kappa" in kwargs.keys(): + kappa = kwargs["kappa"] + else: + kappa = self.kappa + + if "nu" in kwargs.keys(): + v = kwargs["nu"] + else: + v = self.v + + if "ard_gamma" in kwargs.keys(): + ard_gamma = kwargs["ard_gamma"] + else: + ard_gamma = self.ard_gamma + + if "group" in kwargs.keys(): + group = kwargs["group"] + else: + group = self.group + + D = torch.diag(1.0 / (ard_gamma[group])) + a = torch.mm(a, D) + b = torch.mm(b, D) + + a = a[:, group] + b = b[:, group] + + # dists = torch.cdist(a , b , p = 2).T + dists = torch.sqrt(torch.sum((a - b) ** 2)) + + if v == 0.5: + K = torch.exp(-dists) + elif v == 1.5: + K = dists * np.sqrt(3) + K = (1.0 + K) * torch.exp(-K) + elif v == 2.5: + K = dists * np.sqrt(5) + K = (1.0 + K + K**2 / 3.0) * torch.exp(-K) + else: # general case; expensive to evaluate + K = dists + K[K == 0.0] += np.finfo(float).eps # strict zeros result in nan + tmp = np.sqrt(2 * v) * K + K.fill((2 ** (1.0 - v)) / math.gamma(v)) + K *= tmp**v + K *= kv(v, tmp) + return kappa * K + + def ard_matern_kernel(self, a, b, **kwargs): + """ + :param a: matrices + :param b: matrices + :param gamma: smoothness + :param v: Bessel function type + :return: + """ + + if "kappa" in kwargs.keys(): + kappa = kwargs["kappa"] + else: + kappa = self.kappa + + if "nu" in kwargs.keys(): + v = kwargs["nu"] + else: + v = self.v + + if "ard_gamma" in kwargs.keys(): + ard_gamma = kwargs["ard_gamma"] + else: + ard_gamma = self.ard_gamma + + if "group" in kwargs.keys(): + group = kwargs["group"] + else: + group = self.group + + D = torch.diag(1.0 / (ard_gamma[group])) + a = torch.mm(a, D) + b = torch.mm(b, D) + + a = a[:, group] + b = b[:, group] + + dists = torch.cdist(a, b, p=2).T + + if v == 0.5: + K = torch.exp(-dists) + elif v == 1.5: + K = dists * np.sqrt(3) + K = (1.0 + K) * torch.exp(-K) + elif v == 2.5: + K = dists * np.sqrt(5) + K = (1.0 + K + K**2 / 3.0) * torch.exp(-K) + else: # general case; expensive to evaluate + K = dists + K[K == 0.0] += np.finfo(float).eps # strict zeros result in nan + tmp = np.sqrt(2 * v) * K + K.fill((2 ** (1.0 - v)) / math.gamma(v)) + K *= tmp**v + K *= kv(v, tmp) + return kappa * K + + def modified_matern_kernel(self, X, Y, **kwargs): + """ + :param a: matrices + :param b: matrices + :param gamma: smoothness + :param v: Bessel function type + :return: + """ + if "kappa" in kwargs.keys(): + kappa = kwargs["kappa"] + else: + kappa = self.kappa + + if "nu" in kwargs.keys(): + v = kwargs["nu"] + else: + v = self.v + + if "gamma" in kwargs.keys(): + gamma = kwargs["gamma"] + else: + gamma = self.gamma + + if "group" in kwargs.keys(): + group = kwargs["group"] + else: + group = self.group + + a = a[:, group] + b = b[:, group] + + d = X.size()[1] + # Z = np.ones(shape = (X.shape[0],Y.shape[0])) + Z = torch.ones(size=(Y.size()[0], X.size()[0]), dtype=torch.float64) + for i in range(d): + a = X[:, i].view(-1, 1) + b = Y[:, i].view(-1, 1) + # dists = cdist(a/gamma,b/gamma,metric='cityblock').T + dists = cdist(a.numpy() / gamma, b.numpy() / gamma, metric="euclidean").T + # dists = manhattan_distances(a, b).T/ gamma + dists = torch.from_numpy(dists) + if v == 1: + K = torch.exp(-dists) + elif v == 2: + K = (1 + dists) * torch.exp(-dists) + elif v == 3: + K = (dists**2 + 3 * torch.abs(dists) + 3) * torch.exp(-dists) / 3.0 + elif v == 4: + K = ( + (dists**3 + 6 * dists**2 + 15 * torch.abs(dists) + 15) + * torch.exp(-dists) + / 15.0 + ) + else: + raise AssertionError("Kernel with nu = " + str(v) + "not implemented.") + Z = Z * K + return kappa * Z + + def spectral_kernel(self, a, b): + if self.freq is not None: + (n, d) = a.size() + (m, d) = b.size() + dist = torch.zeros(size=(n, m), dtype=torch.float64) + c = 0 + for x in a: + z = 0 + for y in b: + dist[c, z] = torch.sum( + torch.cos(torch.mm(x.view(1, 1) - y.view(1, 1), self.freq)) + ) + z = z + 1 + c = c + 1 + N = self.freq.size()[0] + return torch.t(dist) / N + else: + raise AssertionError("No frequencies passed") + + def wiener_kernel(self, a, b): + """ + Wiener process kernel + k(x,y) = min(x,y) + k(x,y) = \sum_i min(x_i,y_i) + """ + (n, d) = a.size() + (m, d) = b.size() + dist = torch.zeros(size=(n, m)) + # dist = 0.1*np.eye(max(n,m))[0:m,0:n] + c = 0 + for x in a: + z = 0 + for y in b: + print(x, y) + dist[c, z] = torch.from_numpy(np.sum(np.min(np.array([x, y]), axis=0))) + z = z + 1 + c = c + 1 + + # print (dist) + return dist.T + + def derivative_1(self, fixed, x): + """ """ + d = x.size()[1] + n = x.size()[0] + + size = fixed.size()[0] + + if self.optkernel == "squared_exponential": + k_original = self.squared_exponential_kernel(fixed, x) + second = fixed.unsqueeze(1) - x + second = second / self.gamma**2 + res = self.kappa * torch.einsum("ij,jik->ijk", k_original, second) + else: + raise AssertionError("Not implemented for this kernel") + + # result should be (n,d) + return res + + def derivative_2(self, fixed, x): + """ """ + d = x.size()[1] + n = x.size()[0] + + size = fixed.size()[0] + + if self.optkernel == "squared_exponential": + k_original = self.squared_exponential_kernel(fixed, x) + second = fixed.unsqueeze(1) - x + second = second / self.gamma**2 + second2 = torch.einsum("ijk,ijl->ijkl", second, second) + res1 = torch.einsum("ij,jikl->ijkl", k_original, second2) + + ones = torch.zeros(size=(size, n, d, d)) + for j in range(d): + ones[:, :, j, j] = 1.0 + ones = -ones / self.gamma**2 + res2 = torch.einsum("ij,jikl->ijkl", k_original, ones) + res = self.kappa * (res1 + res2) + # res = self.kappa * res2 + else: + raise AssertionError("Not implemented for this kernel") + + return res + + def square_dist(self, a, b): + if a.shape == b.shape: + normx = np.sum(a**2, axis=1).reshape(-1, 1) + normy = np.sum(b**2, axis=1).reshape(-1, 1) + else: + normx = np.sum(a**2, axis=1).reshape(-1, 1) + normy = np.sum(b**2, axis=1).reshape(-1, 1) + + product = b.dot(a.T) + sqdist = np.tile(normx, b.shape[0]).T + np.tile(normy, a.shape[0]) - 2 * product + return sqdist diff --git a/stpy/legacy/integral_kernels.py b/stpy/legacy/integral_kernels.py index fef208c..004f32c 100755 --- a/stpy/legacy/integral_kernels.py +++ b/stpy/legacy/integral_kernels.py @@ -9,576 +9,614 @@ class IntegralKernel: - def __init__(self, dataset, s=0.1): - - self.x = dataset[0] - self.y = dataset[1] - - self.s = s - self.gamma = 1.0 - self.distibution = lambda size: torch.from_numpy(np.random.normal(size=size) * (1. / self.gamma)) - - self.n = self.x.size()[0] - self.d = self.x.size()[1] - - self.basis_func = lambda x, theta: torch.cat((torch.cos(torch.mm(theta, x)), torch.sin(torch.mm(theta, x))), 1) - self.size = 2 - - self.set = [] - self.weights = [] - self.params = [] - self.active_basis = None - - def set_distribution(self, distibution): - self.distibution = distibution - - def set_basis_function(self, fun, size): - self.basis_func = fun - self.size = size - - def sample_basis_function(self): - param = self.distibution(self.d).view(-1, 1) - return [self.get_basis_function(param), param] - - def sample_basis_function_qmc(self, size=1): - inv_cum_dist = lambda x: norm.ppf(x) * (1. / 1.) - params = torch.from_numpy(sample_qmc_halton(inv_cum_dist, size=(size, self.d))) - return params - - def sample_basis_vector(self): - fun = self.sample_basis_function()[0] - return fun(self.x).view(-1) / np.sqrt(self.n) - - def get_basis_function(self, param): - return lambda x: self.basis_func(param, x) - - def add_to_basis(self, fun, weight, param): - self.set.append(fun) - self.weights.append(weight) - self.params.append(param) - - def basis_func_dataset(self, param): - return self.basis_func(param, self.x).view(-1) / np.sqrt(self.n) - - def basis_map_set(self, x, set, weights): - value = torch.zeros(len(set), x.size()[0] * self.size, dtype=torch.float64) - # print (value.size(),x.size(),self.set[0](x).view(-1).size()) - for index, elem in enumerate(set): - # print (np.sqrt(np.array(self.weights[index]).astype(complex))) - value[index, :] = elem(x).view(-1) / np.sqrt(self.n) # * np.sqrt(weights[index]) - return value - - def empty(self): - self.active_basis = None - self.set = [] - self.weights = [] - self.params = [] - - def empty_add_random(self): - self.empty() - self.random_increase(1) - - def basis_map(self, x): - return self.basis_map_set(x, self.set, self.weights) - - def kernel(self, x, y, noise=True): - value = torch.zeros(x.size()[0], y.size()[0], dtype=torch.float64) - - for index, elem in enumerate(self.set): - value += torch.mm(elem(x), torch.t(elem(y))) * self.weights[index] - if noise == True: - value = value + self.s * self.s * torch.eye(x.size()[0], y.size()[0], dtype=torch.float64) - - return value - - def outer_kernel(self, x): - Phi = self.basis_map(x) - value = torch.mm(Phi, torch.t(Phi)) - return value - - def expected_phi(self, x, base=10000): - Ephi = torch.zeros(x.size()[0] * self.size, dtype=torch.float64) - for _ in range(base): - Ephi += self.sample_basis_function()[0](x).view(-1) / np.sqrt(self.n) - Ephi = Ephi / base - return Ephi - - def expected_phi_squared(self, x, fun, base=10000): - prod = 0 - v = fun(x).view(-1) / np.sqrt(self.n) - for _ in range(base): - sample = self.sample_basis_function()[0](x).view(-1) / np.sqrt(self.n) - prod += torch.dot(sample, v) ** 2 - prod = prod / base - return prod - - def expected_phi_squared_set(self, x, base=10000): - v = self.active_basis - - prod = torch.zeros(x.size()[0], ) - for _ in range(base): - sample = self.sample_basis_function()[0](x).view(-1) / np.sqrt(self.n) - prod += torch.mm(sample, v) ** 2 - prod = prod / base - return prod - - def update_basis(self): - if self.active_basis is None: - Phi = self.basis_map(self.x) - self.active_basis = Phi - W = torch.mm(Phi, torch.t(Phi)) + self.s * self.s * torch.eye(len(self.set), dtype=torch.float64) - self.W_inv = torch.inverse(W) - else: - v = self.set[-1](self.x).view(1, -1) / np.sqrt(self.x.size()[0]) - self.active_basis = torch.cat((self.active_basis, v), dim=0) - W = torch.mm(self.active_basis, torch.t(self.active_basis)) + self.s * self.s * torch.eye(len(self.set), - dtype=torch.float64) - self.W_inv = torch.inverse(W) - - """ + def __init__(self, dataset, s=0.1): + + self.x = dataset[0] + self.y = dataset[1] + + self.s = s + self.gamma = 1.0 + self.distibution = lambda size: torch.from_numpy( + np.random.normal(size=size) * (1.0 / self.gamma) + ) + + self.n = self.x.size()[0] + self.d = self.x.size()[1] + + self.basis_func = lambda x, theta: torch.cat( + (torch.cos(torch.mm(theta, x)), torch.sin(torch.mm(theta, x))), 1 + ) + self.size = 2 + + self.set = [] + self.weights = [] + self.params = [] + self.active_basis = None + + def set_distribution(self, distibution): + self.distibution = distibution + + def set_basis_function(self, fun, size): + self.basis_func = fun + self.size = size + + def sample_basis_function(self): + param = self.distibution(self.d).view(-1, 1) + return [self.get_basis_function(param), param] + + def sample_basis_function_qmc(self, size=1): + inv_cum_dist = lambda x: norm.ppf(x) * (1.0 / 1.0) + params = torch.from_numpy(sample_qmc_halton(inv_cum_dist, size=(size, self.d))) + return params + + def sample_basis_vector(self): + fun = self.sample_basis_function()[0] + return fun(self.x).view(-1) / np.sqrt(self.n) + + def get_basis_function(self, param): + return lambda x: self.basis_func(param, x) + + def add_to_basis(self, fun, weight, param): + self.set.append(fun) + self.weights.append(weight) + self.params.append(param) + + def basis_func_dataset(self, param): + return self.basis_func(param, self.x).view(-1) / np.sqrt(self.n) + + def basis_map_set(self, x, set, weights): + value = torch.zeros(len(set), x.size()[0] * self.size, dtype=torch.float64) + # print (value.size(),x.size(),self.set[0](x).view(-1).size()) + for index, elem in enumerate(set): + # print (np.sqrt(np.array(self.weights[index]).astype(complex))) + value[index, :] = elem(x).view(-1) / np.sqrt( + self.n + ) # * np.sqrt(weights[index]) + return value + + def empty(self): + self.active_basis = None + self.set = [] + self.weights = [] + self.params = [] + + def empty_add_random(self): + self.empty() + self.random_increase(1) + + def basis_map(self, x): + return self.basis_map_set(x, self.set, self.weights) + + def kernel(self, x, y, noise=True): + value = torch.zeros(x.size()[0], y.size()[0], dtype=torch.float64) + + for index, elem in enumerate(self.set): + value += torch.mm(elem(x), torch.t(elem(y))) * self.weights[index] + if noise == True: + value = value + self.s * self.s * torch.eye( + x.size()[0], y.size()[0], dtype=torch.float64 + ) + + return value + + def outer_kernel(self, x): + Phi = self.basis_map(x) + value = torch.mm(Phi, torch.t(Phi)) + return value + + def expected_phi(self, x, base=10000): + Ephi = torch.zeros(x.size()[0] * self.size, dtype=torch.float64) + for _ in range(base): + Ephi += self.sample_basis_function()[0](x).view(-1) / np.sqrt(self.n) + Ephi = Ephi / base + return Ephi + + def expected_phi_squared(self, x, fun, base=10000): + prod = 0 + v = fun(x).view(-1) / np.sqrt(self.n) + for _ in range(base): + sample = self.sample_basis_function()[0](x).view(-1) / np.sqrt(self.n) + prod += torch.dot(sample, v) ** 2 + prod = prod / base + return prod + + def expected_phi_squared_set(self, x, base=10000): + v = self.active_basis + + prod = torch.zeros( + x.size()[0], + ) + for _ in range(base): + sample = self.sample_basis_function()[0](x).view(-1) / np.sqrt(self.n) + prod += torch.mm(sample, v) ** 2 + prod = prod / base + return prod + + def update_basis(self): + if self.active_basis is None: + Phi = self.basis_map(self.x) + self.active_basis = Phi + W = torch.mm(Phi, torch.t(Phi)) + self.s * self.s * torch.eye( + len(self.set), dtype=torch.float64 + ) + self.W_inv = torch.inverse(W) + else: + v = self.set[-1](self.x).view(1, -1) / np.sqrt(self.x.size()[0]) + self.active_basis = torch.cat((self.active_basis, v), dim=0) + W = torch.mm( + self.active_basis, torch.t(self.active_basis) + ) + self.s * self.s * torch.eye(len(self.set), dtype=torch.float64) + self.W_inv = torch.inverse(W) + + """ Scores """ - def leverage_score(self, fun, adding=True, weighted=False, variance=True): - - if adding == True: - print(fun(self.x).size()) - v = fun(self.x) / np.sqrt(self.x.size()[0]) - new_active_basis = torch.cat((self.active_basis, v), dim=0) - W = torch.mm(new_active_basis, torch.t(new_active_basis)) + self.s * self.s * torch.eye(len(self.set) + 1, - dtype=torch.float64) - W_inv = torch.inverse(W) - Phi = new_active_basis - else: - W_inv = self.W_inv - Phi = self.active_basis - - if weighted == True: - S = torch.diag(torch.sqrt(torch.from_numpy(np.array(self.weights)))) - Phi = torch.mm(S, Phi) - else: - pass - # solve leverage score problem - A = torch.mm(torch.t(Phi), torch.mm(W_inv, Phi)) - rhs = fun(self.x).view(-1, 1) / np.sqrt(self.x.size()[0]) - # print (torch.mm(torch.t(rhs),rhs), torch.mm(torch.t(rhs),torch.mm(A,rhs))) - if variance == True: - leverage_score = np.abs(torch.mm(torch.t(rhs), rhs) - torch.mm(torch.t(rhs), torch.mm(A, rhs))) / ( - self.s ** 2) - else: - leverage_score = np.abs(torch.mm(torch.t(rhs), rhs) - torch.mm(torch.t(rhs), torch.mm(A, rhs))) - - return leverage_score - - def bayes_quad_score(self, fun, base=1000, Ephi=None): - """ - Implements score Phi(set,X)E[Phi(x)]K^{-1}E[Phi(x)]Phi(X,set) - - :param fun: new basis function - :param base: size of the basis to approximate the expected mapping - :return: - """ - if Ephi is None: - Ephi = self.expected_phi(self.x, base=base).view(-1, 1) - else: - pass - new_set = self.set.copy() - new_set.append(fun) - new_Phi = self.basis_map_set(self.x, new_set, np.ones(len(new_set)).tolist()) - W = torch.mm(new_Phi, torch.t(new_Phi)) + self.s * self.s * torch.eye(len(new_set), dtype=torch.float64) - W_inv = torch.inverse(W) - v = torch.mm(new_Phi, Ephi) - score = torch.mm(torch.t(v), torch.mm(W_inv, v)) - return score - - def greedy_score(self, candidates): - K = self.kernel(self.x, self.x, noise=False) - scores = torch.zeros(len(candidates), dtype=torch.float64) - for j in range(len(candidates)): - fun = candidates[j] - score = torch.norm(torch.mm(fun, torch.t(fun)) - K) - # print(torch.norm(torch.mm(fun,torch.t(fun))),torch.norm(K)) - scores[j] = score - return scores - - def herding_score(self, fun, base=1000, Ephi=None): - # if Ephi is None: - # Ephi = self.expected_phi(self.x, base=base).view(-1,1) - # else: - # pass - # - phi = fun(self.x).view(-1) / np.sqrt(self.n) - Phi = self.active_basis - n, m = Phi.size() - v = 0.0 - for j in range(n): - v = v + torch.dot(Phi[j, :], phi) ** 2 - v = (1. / (n + 1)) * v - z = self.expected_phi_squared(self.x, fun, base=base) - r = z - v - return r - - def variance_scores(self, set=None): - if set is None: - Phi = self.basis_map_set(self.x, self.set, np.ones(len(self.set)).tolist()) - W = torch.mm(Phi, torch.t(Phi)) + self.s * self.s * torch.eye(len(self.set), dtype=torch.float64) - else: - Phi = self.basis_map_set(self.x, set, np.ones(len(set)).tolist()) - W = torch.mm(Phi, torch.t(Phi)) + self.s * self.s * torch.eye(len(set), dtype=torch.float64) - W_inv = torch.inverse(W) - vars = torch.einsum('ji,ij->j', W, W_inv).view(-1, 1) - return vars - - ############################### - ## Increasing the basis size ## - ############################### - - def seq_bayes_quad_increase_heuristic(self, size=1, candidates=10, base=100): - """ - Implements sequential bayes quadrature with inexact optimization - :param size: - :param base: - :return: - """ - Ephi = self.expected_phi(self.x, base=base).view(-1, 1) - for _ in range(size): - funs = [] - scores = torch.zeros(candidates, dtype=torch.float64) - params = [] - for j in range(candidates): - fun, param = self.sample_basis_function() - leverage_score = self.bayes_quad_score(fun, Ephi=Ephi) - funs.append(fun) - scores[j] = leverage_score - params.append(param) - argmax = torch.argmax(scores) - self.add_to_basis(funs[argmax], 1.0, params[argmax]) - self.quadrature_weights() - - # def herding_exact_increase(self, size = 1): - # """ - # Solves exactly the herding problem with a non-linear solver - # :param size: size of the basis to be increase - # :return: None - # """ - # for _ in range(size): - # #fun = lambda x: self.basis_func(param,x) - # p = lambda omega: np.exp(-np.sum(omega ** 2, axis=1).reshape(-1, 1) / 2 * (self.gamma ** 2)) * np.power( - # (self.gamma / np.sqrt(2 * np.pi)), 1.) * np.power(np.pi / 2, 1.) - # ls = lambda param: -self.leverage_score(self.get_basis_function(torch.from_numpy(param).view(-1,1))).numpy()[0]*p(param.reshape(-1,1))[0] - # # plot ls - # - # - # # optimize leverage score - # from scipy.optimize import minimize - # start = self.distibution(self.d).view(-1, 1).numpy() - # res = minimize(ls, start , method="L-BFGS-B", tol=0.0000001, bounds=[[-5,5]]) - # solution = torch.from_numpy(res.x).view(-1,1) - # - # #print (start, solution) - # # params = np.linspace(-10, 10, 1000).reshape(-1, 1) - # # lss = [] - # # - # # for param in params: - # # #print (param, p(param.reshape(-1,1))[0]) - # # lss.append(ls(param)*p(param.reshape(-1,1))[0]) - # # index = np.argmin(np.array(lss)) - # # solution = torch.from_numpy(params[index]).view(-1,1) - # # plt.plot(params, lss) - # # plt.plot(start,ls(start),'ro') - # # plt.plot(solution.numpy(),ls(solution.numpy()),'go') - # #plt.show() - # #print(start, solution) - # self.add_to_basis(self.get_basis_function(solution), 1., solution) - - def herding_increase_heuristic(self, size=1, candidates=100, base=1000): - """ - - :param size: - :param base: - :return: - """ - Ephi = self.expected_phi(self.x, base=base) - for _ in range(size): - # print (_) - self.update_basis() - funs = [] - scores = torch.zeros(candidates, dtype=torch.float64) - params = [] - for j in range(candidates): - fun, param = self.sample_basis_function() - leverage_score = self.herding_score(fun, Ephi=Ephi) - # print (j, leverage_score) - funs.append(fun) - scores[j] = leverage_score - params.append(param) - argmax = torch.argmax(scores) - self.add_to_basis(funs[argmax], 1., params[argmax]) - self.uniformize_weights() - - def herding_increase_heuristic_group(self, size=1, candidates=100, base=1000): - """ - - :param size: - :param base: - :return: - """ - Ephi = self.expected_phi(self.x, base=base) - for _ in range(size): - # print (_) - self.update_basis() - funs = [] - params = [] - cand = torch.zeros(candidates, self.n * self.size, dtype=torch.float) - for j in range(candidates): - fun, param = self.sample_basis_function() - funs.append(fun) - cand[j, :] = fun(self.x).view(-1) / np.sqrt(self.n) - leverage_scores = self.herding_score_group(cand) - - argmax = torch.argmax(leverage_scores) - self.add_to_basis(funs[argmax], 1., params[argmax]) - - self.uniformize_weights() - - def dpp_increase(self, size=1, candidates=1000): - from dppy.finite_dpps import FiniteDPP - funs = [] - params = [] - cand = torch.zeros(candidates, self.n * self.size, dtype=torch.float64) - - for j in range(candidates): - fun, param = self.sample_basis_function() - funs.append(fun) - params.append(param) - cand[j, :] = fun(self.x).view(-1) / np.sqrt(self.n) - - # Random feature vectors - Phi = torch.t(cand) - L = Phi.numpy().T.dot(Phi.numpy()) + self.s * self.s * torch.eye(candidates, candidates, - dtype=torch.float64).numpy() - DPP = FiniteDPP('likelihood', **{'L': L}) - DPP.flush_samples() - DPP.sample_exact_k_dpp(size=size) - sample_ind = DPP.list_of_samples[0] - for sample in sample_ind: - self.add_to_basis(funs[sample], 1., params[sample]) - self.uniformize_weights() - - def leverage_score_sampling(self, size=1): - count = 0 - self.update_basis() - while count < size: - - fun, param = self.sample_basis_function() - leverage_score = self.leverage_score(fun) - q_bar = size - - q = np.random.binomial(q_bar, float(leverage_score)) - # print(count, q, leverage_score) - if q > 0: - w = (q / q_bar) / leverage_score - - self.add_to_basis(fun, w, param) - self.update_basis() - # print("adding", w.float(), param) - count += 1 - else: - pass - # print ("reject", q) - # print ("sum", np.sum(self.weights)) - # self.uniformize_weights() - # self.quadrature_weights() - # self.leverage_weights() - self.normalize_weights() - - # optimize omp weights - - def hermite_quadrature_basis(self, size=1): - self.set = [] - self.weights = [] - self.params = [] - - (nodes, weights) = np.polynomial.hermite.hermgauss(int(size)) - nodes = torch.from_numpy(np.sqrt(2) * nodes / self.gamma) - weights = weights / np.sqrt(np.pi) - # self.weights = weights.tolist() - # print (self.weights) - for index in range(size): - fun = self.get_basis_function(nodes[index].view(self.d, -1)) - self.add_to_basis(fun, weights[index], nodes[index]) - - def greedy_increase(self, size=1, base=100): - for _ in range(size): - # print (_) - self.update_basis() - funs = [] - params = [] - cand = torch.zeros(base, self.n, self.size, dtype=torch.float64) - for j in range(base): - fun, param = self.sample_basis_function() - funs.append(fun) - params.append(param) - cand[j, :] = fun(self.x) # / np.sqrt(self.n) - - scores = self.greedy_score(cand) - argmax = torch.argmin(scores) - self.add_to_basis(funs[argmax], 1., params[argmax]) - self.normalize_weights() - - # print (self.params) - - def random_increase(self, size=1): - for _ in range(size): - f, param = self.sample_basis_function() - self.add_to_basis(f, 1., param) - self.uniformize_weights() - - def qmc_increase(self, size=1): - params = self.sample_basis_function_qmc(size=size) - n = params.size()[0] - for j in range(n): - param = params[j, :].view(1, -1) - # print (params) - self.add_to_basis(self.get_basis_function(param), 1., param) - self.uniformize_weights() - - def bach_algortihm(self, size=1, candidates=100): - for _ in range(size): - set = [] - params = [] - for j in range(candidates): - f, param = self.sample_basis_function() - set.append(f) - params.append(param) - vars = self.variance_scores(set=set) - index = np.argmax(-vars) - self.add_to_basis(set[index], 1., params[index]) - vars = self.variance_scores() - self.weights = vars.view(-1).tolist() - self.normalize_weights() - - def pca(self, kernel, size=1): - if size > self.n: - size = self.n - GP = NystromFeatures(kernel, m=torch.Tensor([size]), s=self.s, approx="svd") - GP.fit_gp(self.x, self.y) - return GP.outer_kernel() - - def nystrom(self, kernel, size=1): - if size > self.n: - size = self.n - GP = NystromFeatures(kernel, m=torch.Tensor([size]), s=self.s, approx="uniform") - GP.fit_gp(self.x, self.y) - return GP.outer_kernel() - - ########################### - ## weights optimization ## - ########################### - - def normalize_weights(self): - - # self.weights = np.ones(len(self.set))/len(self.set) - sum = np.sum(np.array(self.weights)) - self.weights = np.array(self.weights) / sum - self.weights = self.weights.tolist() - - # print (self.weights) - - def uniformize_weights(self): - self.weights = np.ones(len(self.set)) / len(self.set) - self.weights = self.weights.tolist() - - # print (self.weights) - - def bayesian_quadrature_weights(self, base=1000): - """ - Bayesian Quadrature weights - two possible kernels - :return: - """ - - phi = fun(self.x).view(-1) / np.sqrt(self.n) - Phi = self.active_basis - n, m = Phi.size() - - Z = self.expected_phi_squared_set(self.x, base=base) - - # assemble kernel - K = self.outer_kernel(self.x) * self.outer_kernel(self.x) - # invert kernel - self.weights = torch.mm(torch.mm(Z, torch.pinverse(K)), Z) - self.weights = self.weights.tolist() - - def leverage_weights(self): - - Phi = self.basis_map(self.x) - self.active_basis = Phi - W = torch.mm(Phi, torch.t(Phi)) + self.s * self.s * torch.eye(len(self.set), dtype=torch.float64) - self.W_inv = torch.inverse(W) - - new_weights = [] - n = len(self.set) - for fun in self.set: - leverage_score = self.leverage_score(fun, adding=False, variance=True, weighted=False) - # print (leverage_score) - new_weights.append(leverage_score) - self.weights = new_weights - self.normalize_weights() - - def leverage_weights_experimental(self, Kinv): - - Phi = self.basis_map(self.x) - self.active_basis = Phi - W = torch.mm(Phi, torch.t(Phi)) + self.s * self.s * torch.eye(len(self.set), dtype=torch.float64) - W_outer = torch.mm(torch.t(Phi), Phi) + self.s * self.s * torch.eye(self.n * 2, dtype=torch.float64) - W_outer_inv = torch.inverse(W_outer) - self.W_inv = torch.inverse(W) - - print(torch.norm(W_outer - Kinv)) - - # print (Kinv) - new_weights = [] - n = len(self.set) - for fun in self.set: - # leverage_score = self.leverage_score(fun, adding = False, variance = False, weighted= True) - v = fun(self.x).view(-1, 1) / np.sqrt(self.n) - # print (torch.trace(torch.mm(torch.t(v),v))) - mat = torch.mm(torch.t(v), torch.mm(W_outer_inv, v)) - # print (mat) - leverage_score = torch.trace(mat) - if leverage_score > 0.0: - # print ("Violation!") - lv = self.leverage_score(fun, adding=False, variance=True, weighted=False) - print(float(leverage_score), float(lv)) - # new_weights.append(float(2./(n*leverage_score))) - new_weights.append(1. / (n * leverage_score)) - self.weights = new_weights - self.normalize_weights() - - # print (self.weights) - # print (self.params) - # print(self.weights) - def omp_optimize(self, size=1): - pass + def leverage_score(self, fun, adding=True, weighted=False, variance=True): + + if adding == True: + print(fun(self.x).size()) + v = fun(self.x) / np.sqrt(self.x.size()[0]) + new_active_basis = torch.cat((self.active_basis, v), dim=0) + W = torch.mm( + new_active_basis, torch.t(new_active_basis) + ) + self.s * self.s * torch.eye(len(self.set) + 1, dtype=torch.float64) + W_inv = torch.inverse(W) + Phi = new_active_basis + else: + W_inv = self.W_inv + Phi = self.active_basis + + if weighted == True: + S = torch.diag(torch.sqrt(torch.from_numpy(np.array(self.weights)))) + Phi = torch.mm(S, Phi) + else: + pass + # solve leverage score problem + A = torch.mm(torch.t(Phi), torch.mm(W_inv, Phi)) + rhs = fun(self.x).view(-1, 1) / np.sqrt(self.x.size()[0]) + # print (torch.mm(torch.t(rhs),rhs), torch.mm(torch.t(rhs),torch.mm(A,rhs))) + if variance == True: + leverage_score = np.abs( + torch.mm(torch.t(rhs), rhs) - torch.mm(torch.t(rhs), torch.mm(A, rhs)) + ) / (self.s**2) + else: + leverage_score = np.abs( + torch.mm(torch.t(rhs), rhs) - torch.mm(torch.t(rhs), torch.mm(A, rhs)) + ) + + return leverage_score + + def bayes_quad_score(self, fun, base=1000, Ephi=None): + """ + Implements score Phi(set,X)E[Phi(x)]K^{-1}E[Phi(x)]Phi(X,set) + + :param fun: new basis function + :param base: size of the basis to approximate the expected mapping + :return: + """ + if Ephi is None: + Ephi = self.expected_phi(self.x, base=base).view(-1, 1) + else: + pass + new_set = self.set.copy() + new_set.append(fun) + new_Phi = self.basis_map_set(self.x, new_set, np.ones(len(new_set)).tolist()) + W = torch.mm(new_Phi, torch.t(new_Phi)) + self.s * self.s * torch.eye( + len(new_set), dtype=torch.float64 + ) + W_inv = torch.inverse(W) + v = torch.mm(new_Phi, Ephi) + score = torch.mm(torch.t(v), torch.mm(W_inv, v)) + return score + + def greedy_score(self, candidates): + K = self.kernel(self.x, self.x, noise=False) + scores = torch.zeros(len(candidates), dtype=torch.float64) + for j in range(len(candidates)): + fun = candidates[j] + score = torch.norm(torch.mm(fun, torch.t(fun)) - K) + # print(torch.norm(torch.mm(fun,torch.t(fun))),torch.norm(K)) + scores[j] = score + return scores + + def herding_score(self, fun, base=1000, Ephi=None): + # if Ephi is None: + # Ephi = self.expected_phi(self.x, base=base).view(-1,1) + # else: + # pass + # + phi = fun(self.x).view(-1) / np.sqrt(self.n) + Phi = self.active_basis + n, m = Phi.size() + v = 0.0 + for j in range(n): + v = v + torch.dot(Phi[j, :], phi) ** 2 + v = (1.0 / (n + 1)) * v + z = self.expected_phi_squared(self.x, fun, base=base) + r = z - v + return r + + def variance_scores(self, set=None): + if set is None: + Phi = self.basis_map_set(self.x, self.set, np.ones(len(self.set)).tolist()) + W = torch.mm(Phi, torch.t(Phi)) + self.s * self.s * torch.eye( + len(self.set), dtype=torch.float64 + ) + else: + Phi = self.basis_map_set(self.x, set, np.ones(len(set)).tolist()) + W = torch.mm(Phi, torch.t(Phi)) + self.s * self.s * torch.eye( + len(set), dtype=torch.float64 + ) + W_inv = torch.inverse(W) + vars = torch.einsum("ji,ij->j", W, W_inv).view(-1, 1) + return vars + + ############################### + ## Increasing the basis size ## + ############################### + + def seq_bayes_quad_increase_heuristic(self, size=1, candidates=10, base=100): + """ + Implements sequential bayes quadrature with inexact optimization + :param size: + :param base: + :return: + """ + Ephi = self.expected_phi(self.x, base=base).view(-1, 1) + for _ in range(size): + funs = [] + scores = torch.zeros(candidates, dtype=torch.float64) + params = [] + for j in range(candidates): + fun, param = self.sample_basis_function() + leverage_score = self.bayes_quad_score(fun, Ephi=Ephi) + funs.append(fun) + scores[j] = leverage_score + params.append(param) + argmax = torch.argmax(scores) + self.add_to_basis(funs[argmax], 1.0, params[argmax]) + self.quadrature_weights() + + # def herding_exact_increase(self, size = 1): + # """ + # Solves exactly the herding problem with a non-linear solver + # :param size: size of the basis to be increase + # :return: None + # """ + # for _ in range(size): + # #fun = lambda x: self.basis_func(param,x) + # p = lambda omega: np.exp(-np.sum(omega ** 2, axis=1).reshape(-1, 1) / 2 * (self.gamma ** 2)) * np.power( + # (self.gamma / np.sqrt(2 * np.pi)), 1.) * np.power(np.pi / 2, 1.) + # ls = lambda param: -self.leverage_score(self.get_basis_function(torch.from_numpy(param).view(-1,1))).numpy()[0]*p(param.reshape(-1,1))[0] + # # plot ls + # + # + # # optimize leverage score + # from scipy.optimize import minimize + # start = self.distibution(self.d).view(-1, 1).numpy() + # res = minimize(ls, start , method="L-BFGS-B", tol=0.0000001, bounds=[[-5,5]]) + # solution = torch.from_numpy(res.x).view(-1,1) + # + # #print (start, solution) + # # params = np.linspace(-10, 10, 1000).reshape(-1, 1) + # # lss = [] + # # + # # for param in params: + # # #print (param, p(param.reshape(-1,1))[0]) + # # lss.append(ls(param)*p(param.reshape(-1,1))[0]) + # # index = np.argmin(np.array(lss)) + # # solution = torch.from_numpy(params[index]).view(-1,1) + # # plt.plot(params, lss) + # # plt.plot(start,ls(start),'ro') + # # plt.plot(solution.numpy(),ls(solution.numpy()),'go') + # #plt.show() + # #print(start, solution) + # self.add_to_basis(self.get_basis_function(solution), 1., solution) + + def herding_increase_heuristic(self, size=1, candidates=100, base=1000): + """ + + :param size: + :param base: + :return: + """ + Ephi = self.expected_phi(self.x, base=base) + for _ in range(size): + # print (_) + self.update_basis() + funs = [] + scores = torch.zeros(candidates, dtype=torch.float64) + params = [] + for j in range(candidates): + fun, param = self.sample_basis_function() + leverage_score = self.herding_score(fun, Ephi=Ephi) + # print (j, leverage_score) + funs.append(fun) + scores[j] = leverage_score + params.append(param) + argmax = torch.argmax(scores) + self.add_to_basis(funs[argmax], 1.0, params[argmax]) + self.uniformize_weights() + + def herding_increase_heuristic_group(self, size=1, candidates=100, base=1000): + """ + + :param size: + :param base: + :return: + """ + Ephi = self.expected_phi(self.x, base=base) + for _ in range(size): + # print (_) + self.update_basis() + funs = [] + params = [] + cand = torch.zeros(candidates, self.n * self.size, dtype=torch.float) + for j in range(candidates): + fun, param = self.sample_basis_function() + funs.append(fun) + cand[j, :] = fun(self.x).view(-1) / np.sqrt(self.n) + leverage_scores = self.herding_score_group(cand) + + argmax = torch.argmax(leverage_scores) + self.add_to_basis(funs[argmax], 1.0, params[argmax]) + + self.uniformize_weights() + + def dpp_increase(self, size=1, candidates=1000): + from dppy.finite_dpps import FiniteDPP + + funs = [] + params = [] + cand = torch.zeros(candidates, self.n * self.size, dtype=torch.float64) + + for j in range(candidates): + fun, param = self.sample_basis_function() + funs.append(fun) + params.append(param) + cand[j, :] = fun(self.x).view(-1) / np.sqrt(self.n) + + # Random feature vectors + Phi = torch.t(cand) + L = ( + Phi.numpy().T.dot(Phi.numpy()) + + self.s + * self.s + * torch.eye(candidates, candidates, dtype=torch.float64).numpy() + ) + DPP = FiniteDPP("likelihood", **{"L": L}) + DPP.flush_samples() + DPP.sample_exact_k_dpp(size=size) + sample_ind = DPP.list_of_samples[0] + for sample in sample_ind: + self.add_to_basis(funs[sample], 1.0, params[sample]) + self.uniformize_weights() + + def leverage_score_sampling(self, size=1): + count = 0 + self.update_basis() + while count < size: + + fun, param = self.sample_basis_function() + leverage_score = self.leverage_score(fun) + q_bar = size + + q = np.random.binomial(q_bar, float(leverage_score)) + # print(count, q, leverage_score) + if q > 0: + w = (q / q_bar) / leverage_score + + self.add_to_basis(fun, w, param) + self.update_basis() + # print("adding", w.float(), param) + count += 1 + else: + pass + # print ("reject", q) + # print ("sum", np.sum(self.weights)) + # self.uniformize_weights() + # self.quadrature_weights() + # self.leverage_weights() + self.normalize_weights() + + # optimize omp weights + + def hermite_quadrature_basis(self, size=1): + self.set = [] + self.weights = [] + self.params = [] + + (nodes, weights) = np.polynomial.hermite.hermgauss(int(size)) + nodes = torch.from_numpy(np.sqrt(2) * nodes / self.gamma) + weights = weights / np.sqrt(np.pi) + # self.weights = weights.tolist() + # print (self.weights) + for index in range(size): + fun = self.get_basis_function(nodes[index].view(self.d, -1)) + self.add_to_basis(fun, weights[index], nodes[index]) + + def greedy_increase(self, size=1, base=100): + for _ in range(size): + # print (_) + self.update_basis() + funs = [] + params = [] + cand = torch.zeros(base, self.n, self.size, dtype=torch.float64) + for j in range(base): + fun, param = self.sample_basis_function() + funs.append(fun) + params.append(param) + cand[j, :] = fun(self.x) # / np.sqrt(self.n) + + scores = self.greedy_score(cand) + argmax = torch.argmin(scores) + self.add_to_basis(funs[argmax], 1.0, params[argmax]) + self.normalize_weights() + + # print (self.params) + + def random_increase(self, size=1): + for _ in range(size): + f, param = self.sample_basis_function() + self.add_to_basis(f, 1.0, param) + self.uniformize_weights() + + def qmc_increase(self, size=1): + params = self.sample_basis_function_qmc(size=size) + n = params.size()[0] + for j in range(n): + param = params[j, :].view(1, -1) + # print (params) + self.add_to_basis(self.get_basis_function(param), 1.0, param) + self.uniformize_weights() + + def bach_algortihm(self, size=1, candidates=100): + for _ in range(size): + set = [] + params = [] + for j in range(candidates): + f, param = self.sample_basis_function() + set.append(f) + params.append(param) + vars = self.variance_scores(set=set) + index = np.argmax(-vars) + self.add_to_basis(set[index], 1.0, params[index]) + vars = self.variance_scores() + self.weights = vars.view(-1).tolist() + self.normalize_weights() + + def pca(self, kernel, size=1): + if size > self.n: + size = self.n + GP = NystromFeatures(kernel, m=torch.Tensor([size]), s=self.s, approx="svd") + GP.fit_gp(self.x, self.y) + return GP.outer_kernel() + + def nystrom(self, kernel, size=1): + if size > self.n: + size = self.n + GP = NystromFeatures(kernel, m=torch.Tensor([size]), s=self.s, approx="uniform") + GP.fit_gp(self.x, self.y) + return GP.outer_kernel() + + ########################### + ## weights optimization ## + ########################### + + def normalize_weights(self): + + # self.weights = np.ones(len(self.set))/len(self.set) + sum = np.sum(np.array(self.weights)) + self.weights = np.array(self.weights) / sum + self.weights = self.weights.tolist() + + # print (self.weights) + + def uniformize_weights(self): + self.weights = np.ones(len(self.set)) / len(self.set) + self.weights = self.weights.tolist() + + # print (self.weights) + + def bayesian_quadrature_weights(self, base=1000): + """ + Bayesian Quadrature weights + two possible kernels + :return: + """ + + phi = fun(self.x).view(-1) / np.sqrt(self.n) + Phi = self.active_basis + n, m = Phi.size() + + Z = self.expected_phi_squared_set(self.x, base=base) + + # assemble kernel + K = self.outer_kernel(self.x) * self.outer_kernel(self.x) + # invert kernel + self.weights = torch.mm(torch.mm(Z, torch.pinverse(K)), Z) + self.weights = self.weights.tolist() + + def leverage_weights(self): + + Phi = self.basis_map(self.x) + self.active_basis = Phi + W = torch.mm(Phi, torch.t(Phi)) + self.s * self.s * torch.eye( + len(self.set), dtype=torch.float64 + ) + self.W_inv = torch.inverse(W) + + new_weights = [] + n = len(self.set) + for fun in self.set: + leverage_score = self.leverage_score( + fun, adding=False, variance=True, weighted=False + ) + # print (leverage_score) + new_weights.append(leverage_score) + self.weights = new_weights + self.normalize_weights() + + def leverage_weights_experimental(self, Kinv): + + Phi = self.basis_map(self.x) + self.active_basis = Phi + W = torch.mm(Phi, torch.t(Phi)) + self.s * self.s * torch.eye( + len(self.set), dtype=torch.float64 + ) + W_outer = torch.mm(torch.t(Phi), Phi) + self.s * self.s * torch.eye( + self.n * 2, dtype=torch.float64 + ) + W_outer_inv = torch.inverse(W_outer) + self.W_inv = torch.inverse(W) + + print(torch.norm(W_outer - Kinv)) + + # print (Kinv) + new_weights = [] + n = len(self.set) + for fun in self.set: + # leverage_score = self.leverage_score(fun, adding = False, variance = False, weighted= True) + v = fun(self.x).view(-1, 1) / np.sqrt(self.n) + # print (torch.trace(torch.mm(torch.t(v),v))) + mat = torch.mm(torch.t(v), torch.mm(W_outer_inv, v)) + # print (mat) + leverage_score = torch.trace(mat) + if leverage_score > 0.0: + # print ("Violation!") + lv = self.leverage_score( + fun, adding=False, variance=True, weighted=False + ) + print(float(leverage_score), float(lv)) + # new_weights.append(float(2./(n*leverage_score))) + new_weights.append(1.0 / (n * leverage_score)) + self.weights = new_weights + self.normalize_weights() + + # print (self.weights) + # print (self.params) + # print(self.weights) + def omp_optimize(self, size=1): + pass if __name__ == "__main__": - d = 1 - n = 1024 - N = 100 - L_infinity_ball = 1 - s = 0.001 - xtest = torch.from_numpy(interval(n, d)) - # x = torch.from_numpy(np.random.uniform(-L_infinity_ball, L_infinity_ball, size=(N, d))) - x = torch.from_numpy(np.linspace(-1, 1, N)).view(N, d) - f = lambda q: torch.sin(torch.sum(q * 4, dim=1)).view(-1, 1) - y = f(x) - - IK = IntegralKernel([x, y], s=s) - IK.random_increase(1000) - IK.uniformize_weights() - IK.quadrature_weights() - - fun = IK.sample_basis_function()[0] - print(IK.bayes_quad_score(fun)) + d = 1 + n = 1024 + N = 100 + L_infinity_ball = 1 + s = 0.001 + xtest = torch.from_numpy(interval(n, d)) + # x = torch.from_numpy(np.random.uniform(-L_infinity_ball, L_infinity_ball, size=(N, d))) + x = torch.from_numpy(np.linspace(-1, 1, N)).view(N, d) + f = lambda q: torch.sin(torch.sum(q * 4, dim=1)).view(-1, 1) + y = f(x) + + IK = IntegralKernel([x, y], s=s) + IK.random_increase(1000) + IK.uniformize_weights() + IK.quadrature_weights() + + fun = IK.sample_basis_function()[0] + print(IK.bayes_quad_score(fun)) diff --git a/stpy/legacy/integral_kernels2.py b/stpy/legacy/integral_kernels2.py index d442d8d..b5699b5 100755 --- a/stpy/legacy/integral_kernels2.py +++ b/stpy/legacy/integral_kernels2.py @@ -7,133 +7,143 @@ class IntegralKernel: - def __init__(self, dataset, s=0.1): + def __init__(self, dataset, s=0.1): - self.x = dataset[0] - self.y = dataset[1] + self.x = dataset[0] + self.y = dataset[1] - self.s = s - self.gamma = 1.0 - self.distibution = lambda size: torch.from_numpy(np.random.normal(size=size) * (1. / self.gamma)) + self.s = s + self.gamma = 1.0 + self.distibution = lambda size: torch.from_numpy( + np.random.normal(size=size) * (1.0 / self.gamma) + ) - self.n = self.x.size()[0] - self.d = self.x.size()[1] + self.n = self.x.size()[0] + self.d = self.x.size()[1] - self.basis_func = lambda x, theta: torch.cat((torch.cos(torch.mm(theta, x)), torch.sin(torch.mm(theta, x))), 1) - self.size = 2 + self.basis_func = lambda x, theta: torch.cat( + (torch.cos(torch.mm(theta, x)), torch.sin(torch.mm(theta, x))), 1 + ) + self.size = 2 - self.set = [] - self.weights = [] - self.params = [] - self.active_basis = None + self.set = [] + self.weights = [] + self.params = [] + self.active_basis = None - def set_distribution(self, distibution): - self.distibution = distibution + def set_distribution(self, distibution): + self.distibution = distibution - def set_basis_function(self, fun, size): - self.basis_func = fun - self.size = size + def set_basis_function(self, fun, size): + self.basis_func = fun + self.size = size - def sample_basis_function(self): - param = self.distibution(self.d).view(-1, 1) - return [self.get_basis_function(param), param] + def sample_basis_function(self): + param = self.distibution(self.d).view(-1, 1) + return [self.get_basis_function(param), param] - def sample_basis_function_qmc(self, size=1): - inv_cum_dist = lambda x: norm.ppf(x) * (1. / 1.) - params = torch.from_numpy(sample_qmc_halton(inv_cum_dist, size=(size, self.d))) - return params + def sample_basis_function_qmc(self, size=1): + inv_cum_dist = lambda x: norm.ppf(x) * (1.0 / 1.0) + params = torch.from_numpy(sample_qmc_halton(inv_cum_dist, size=(size, self.d))) + return params - def sample_basis_vector(self): - fun = self.sample_basis_function()[0] - return fun(self.x).view(-1) / np.sqrt(self.n) + def sample_basis_vector(self): + fun = self.sample_basis_function()[0] + return fun(self.x).view(-1) / np.sqrt(self.n) - def get_basis_function(self, param): - return lambda x: self.basis_func(param, x) + def get_basis_function(self, param): + return lambda x: self.basis_func(param, x) - def add_to_basis(self, fun, weight, param): - self.set.append(fun) - self.weights.append(weight) - self.params.append(param) + def add_to_basis(self, fun, weight, param): + self.set.append(fun) + self.weights.append(weight) + self.params.append(param) - def empty(self): - self.active_basis = None - self.set = [] - self.weights = [] - self.params = [] + def empty(self): + self.active_basis = None + self.set = [] + self.weights = [] + self.params = [] - def empty_add_random(self): - self.empty() - self.random_increase(1) + def empty_add_random(self): + self.empty() + self.random_increase(1) - def kernel(self, x, y, noise=True): - value = torch.zeros(x.size()[0], y.size()[0], dtype=torch.float64) + def kernel(self, x, y, noise=True): + value = torch.zeros(x.size()[0], y.size()[0], dtype=torch.float64) - for index, elem in enumerate(self.set): - value += torch.mm(elem(x), torch.t(elem(y))) * self.weights[index] + for index, elem in enumerate(self.set): + value += torch.mm(elem(x), torch.t(elem(y))) * self.weights[index] - if noise == True: - value = value + self.s * self.s * torch.eye(x.size()[0], y.size()[0], dtype=torch.float64) + if noise == True: + value = value + self.s * self.s * torch.eye( + x.size()[0], y.size()[0], dtype=torch.float64 + ) - return value + return value - def random_basis(self, size=1): - for _ in range(size): - f, param = self.sample_basis_function() - self.add_to_basis(f, 1., param) - self.uniformize_weights() + def random_basis(self, size=1): + for _ in range(size): + f, param = self.sample_basis_function() + self.add_to_basis(f, 1.0, param) + self.uniformize_weights() - def leverage_socre(self, fun): - v = fun(self.x) / np.sqrt(self.x.size()[0]) - new_set = self.set + def leverage_socre(self, fun): + v = fun(self.x) / np.sqrt(self.x.size()[0]) + new_set = self.set - def basis_map_set(self, x, set): - value = torch.zeros(len(set), x.size()[0] * self.size, dtype=torch.float64) - for index, elem in enumerate(set): - value[index, :] = elem(x).view(-1) / np.sqrt(self.n) # * np.sqrt(weights[index]) - return value + def basis_map_set(self, x, set): + value = torch.zeros(len(set), x.size()[0] * self.size, dtype=torch.float64) + for index, elem in enumerate(set): + value[index, :] = elem(x).view(-1) / np.sqrt( + self.n + ) # * np.sqrt(weights[index]) + return value - def outer_kernel(self, x): - Phi = self.basis_map_set(x, self.set) - value = torch.mm(Phi, torch.t(Phi)) - return value + def outer_kernel(self, x): + Phi = self.basis_map_set(x, self.set) + value = torch.mm(Phi, torch.t(Phi)) + return value - def leverage_score(self, fun): + def leverage_score(self, fun): - return 1.0 + return 1.0 - def leverage_score_basis(self, size=1): - count = 0 + def leverage_score_basis(self, size=1): + count = 0 - while count < size: - fun, param = self.sample_basis_function() - leverage_score = self.leverage_score(fun) - q_bar = size + while count < size: + fun, param = self.sample_basis_function() + leverage_score = self.leverage_score(fun) + q_bar = size - q = np.random.binomial(q_bar, float(leverage_score)) - if q > 0: - w = (q / q_bar) / leverage_score + q = np.random.binomial(q_bar, float(leverage_score)) + if q > 0: + w = (q / q_bar) / leverage_score - self.add_to_basis(fun, w, param) - count += 1 - else: - pass + self.add_to_basis(fun, w, param) + count += 1 + else: + pass - self.normalize_weights() + self.normalize_weights() - def normalize_weights(self): + def normalize_weights(self): - # self.weights = np.ones(len(self.set))/len(self.set) - sum = np.sum(np.array(self.weights)) - self.weights = np.array(self.weights) / sum - self.weights = self.weights.tolist() + # self.weights = np.ones(len(self.set))/len(self.set) + sum = np.sum(np.array(self.weights)) + self.weights = np.array(self.weights) / sum + self.weights = self.weights.tolist() + + # print (self.weights) + + def uniformize_weights(self): + self.weights = np.ones(len(self.set)) / len(self.set) + self.weights = self.weights.tolist() - # print (self.weights) - def uniformize_weights(self): - self.weights = np.ones(len(self.set)) / len(self.set) - self.weights = self.weights.tolist() # print (self.weights) if __name__ == "__main__": - pass + pass diff --git a/stpy/optim/cost_functions.py b/stpy/optim/cost_functions.py index 9248763..a31ba01 100755 --- a/stpy/optim/cost_functions.py +++ b/stpy/optim/cost_functions.py @@ -3,51 +3,62 @@ class CostFunction: - def __init__(self, cost, number_args=1): - self.cost = cost - self.number_args = number_args - - def joined_egrad(self, Xx): - for X in Xx: - X.requires_grad_(True) - y = self.cost(Xx) - y.backward(retain_graph=True) - output = [] - for X in Xx: - output.append(X.grad) - return output - - def joined_hess(self, Xx, Uu): - for X in zip(Xx): - X.requires_grad_(True) - y = self.joined_egrad(Xx) - y.backward(retain_graph=True) - output = [] - for X, U in zip(Xx, Uu): - output.append(torch.mm(X.grad, Uu)) - return output - - def egrad(self, X): - X.requires_grad_(True) - y = self.cost(X) - y.backward(retain_graph=True) - return X.grad - - def ehess(self, X, U): - X.requires_grad_(True) - y = self.egrad(X) - y.backward(retain_graph=True) - return torch.mm(X.grad, U) - - def define(self): - if self.number_args == 1: - cost_numpy = lambda X: self.cost(torch.from_numpy(X)).data.numpy() - grad_numpy = lambda X: self.egrad(torch.from_numpy(X)).data.numpy() - hess_numpy = lambda X, U: self.ehess(torch.from_numpy(X), torch.from_numpy(U)).data.numpy() - return [cost_numpy, grad_numpy, hess_numpy] - else: - cost_numpy = lambda Xx: self.cost([torch.from_numpy(X) for X in Xx]).data.numpy() - grad_numpy = lambda Xx: [z.data.numpy() for z in self.joined_egrad([torch.from_numpy(X) for X in Xx])] - hess_numpy = lambda Xx, Uu: [z.data.numpy() for z in self.joined_ehess([torch.from_numpy(X) for X in Xx], - [torch.from_numpy(U) for U in Uu])] - return [cost_numpy, grad_numpy, hess_numpy] + def __init__(self, cost, number_args=1): + self.cost = cost + self.number_args = number_args + + def joined_egrad(self, Xx): + for X in Xx: + X.requires_grad_(True) + y = self.cost(Xx) + y.backward(retain_graph=True) + output = [] + for X in Xx: + output.append(X.grad) + return output + + def joined_hess(self, Xx, Uu): + for X in zip(Xx): + X.requires_grad_(True) + y = self.joined_egrad(Xx) + y.backward(retain_graph=True) + output = [] + for X, U in zip(Xx, Uu): + output.append(torch.mm(X.grad, Uu)) + return output + + def egrad(self, X): + X.requires_grad_(True) + y = self.cost(X) + y.backward(retain_graph=True) + return X.grad + + def ehess(self, X, U): + X.requires_grad_(True) + y = self.egrad(X) + y.backward(retain_graph=True) + return torch.mm(X.grad, U) + + def define(self): + if self.number_args == 1: + cost_numpy = lambda X: self.cost(torch.from_numpy(X)).data.numpy() + grad_numpy = lambda X: self.egrad(torch.from_numpy(X)).data.numpy() + hess_numpy = lambda X, U: self.ehess( + torch.from_numpy(X), torch.from_numpy(U) + ).data.numpy() + return [cost_numpy, grad_numpy, hess_numpy] + else: + cost_numpy = lambda Xx: self.cost( + [torch.from_numpy(X) for X in Xx] + ).data.numpy() + grad_numpy = lambda Xx: [ + z.data.numpy() + for z in self.joined_egrad([torch.from_numpy(X) for X in Xx]) + ] + hess_numpy = lambda Xx, Uu: [ + z.data.numpy() + for z in self.joined_ehess( + [torch.from_numpy(X) for X in Xx], [torch.from_numpy(U) for U in Uu] + ) + ] + return [cost_numpy, grad_numpy, hess_numpy] diff --git a/stpy/optim/custom_optimizers.py b/stpy/optim/custom_optimizers.py index 568802e..b3f5bc7 100644 --- a/stpy/optim/custom_optimizers.py +++ b/stpy/optim/custom_optimizers.py @@ -4,327 +4,340 @@ import torch -def bisection(g, a, b, N, version='stop'): - '''Approximate solution of g(x)=0 on interval [a,b] by bisection method. - - Parameters - ---------- - g : function - The function for which we are trying to approximate a solution g(x)=0. - a,b : numbers - The interval in which to search for a solution. The function returns - None if g(a)*g(b) >= 0 since a solution is not guaranteed. - N : (positive) integer - The number of iterations to implement. - - Returns - ------- - x_N : number - The midpoint of the Nth interval computed by the bisection method. The - initial interval [a_0,b_0] is given by [a,b]. If f(m_n) == 0 for some - midpoint m_n = (a_n + b_n)/2, then the function returns this solution. - If all signs of values f(a_n), f(b_n) and f(m_n) are the same at any - iteration, the bisection method fails and return None. - - Examples - -------- - >>> f = lambda x: x**2 - x - 1 - >>> bisection(f,1,2,25) - 1.618033990263939 - >>> f = lambda x: (2*x - 1)*(x - 3) - >>> bisection(f,0,1,10) - 0.5 - ''' - d = {} - - def f(x): - if x in d: - return d[x] - else: - d[x] = g(x) - return d[x] - - if version == 'stop': - if f(a) < 0.: - return a - if f(a) * f(b) > 0.: - print("Bisection method fails.") - return None - - a_n = a - b_n = b - dict = {} - for n in range(1, N + 1): - m_n = (a_n + b_n) / 2. - f_m_n = f(m_n) - if f(a_n) * f_m_n < 0: - a_n = a_n - b_n = m_n - elif f(b_n) * f_m_n < 0: - a_n = m_n - b_n = b_n - elif f_m_n == 0: - print("Found exact solution.") - return m_n - else: - return a_n - print("Bisection method fails.") - return None - return (a_n + b_n) / 2. +def bisection(g, a, b, N, version="stop"): + """Approximate solution of g(x)=0 on interval [a,b] by bisection method. + + Parameters + ---------- + g : function + The function for which we are trying to approximate a solution g(x)=0. + a,b : numbers + The interval in which to search for a solution. The function returns + None if g(a)*g(b) >= 0 since a solution is not guaranteed. + N : (positive) integer + The number of iterations to implement. + + Returns + ------- + x_N : number + The midpoint of the Nth interval computed by the bisection method. The + initial interval [a_0,b_0] is given by [a,b]. If f(m_n) == 0 for some + midpoint m_n = (a_n + b_n)/2, then the function returns this solution. + If all signs of values f(a_n), f(b_n) and f(m_n) are the same at any + iteration, the bisection method fails and return None. + + Examples + -------- + >>> f = lambda x: x**2 - x - 1 + >>> bisection(f,1,2,25) + 1.618033990263939 + >>> f = lambda x: (2*x - 1)*(x - 3) + >>> bisection(f,0,1,10) + 0.5 + """ + d = {} + + def f(x): + if x in d: + return d[x] + else: + d[x] = g(x) + return d[x] + + if version == "stop": + if f(a) < 0.0: + return a + if f(a) * f(b) > 0.0: + print("Bisection method fails.") + return None + + a_n = a + b_n = b + dict = {} + for n in range(1, N + 1): + m_n = (a_n + b_n) / 2.0 + f_m_n = f(m_n) + if f(a_n) * f_m_n < 0: + a_n = a_n + b_n = m_n + elif f(b_n) * f_m_n < 0: + a_n = m_n + b_n = b_n + elif f_m_n == 0: + print("Found exact solution.") + return m_n + else: + return a_n + print("Bisection method fails.") + return None + return (a_n + b_n) / 2.0 def greedy_per_step(fun, add, ground_set, min=True): - scores = [] - for elem in range(ground_set.size()[0]): - new = add(ground_set[elem, :].view(1, -1)) - scores.append(fun(new)) - if min: - j = np.argmin(scores) - else: - j = np.argmax(scores) - return [j] + scores = [] + for elem in range(ground_set.size()[0]): + new = add(ground_set[elem, :].view(1, -1)) + scores.append(fun(new)) + if min: + j = np.argmin(scores) + else: + j = np.argmax(scores) + return [j] def QPQC_problem(A, a, s, Sigma=None): - n = A.shape[0] - if Sigma is None: - I = np.eye(n) - Sigma = I + n = A.shape[0] + if Sigma is None: + I = np.eye(n) + Sigma = I - # SDP relaxation - M = np.zeros(shape=(n + 1, n + 1)) + # SDP relaxation + M = np.zeros(shape=(n + 1, n + 1)) - M[0, 1:] = -a.reshape(-1) - M[1:, 0] = -a.T.reshape(-1) - M[1:, 1:] = A + M[0, 1:] = -a.reshape(-1) + M[1:, 0] = -a.T.reshape(-1) + M[1:, 1:] = A - # print (M) + # print (M) - Meqconst = np.eye(n + 1) - Meqconst[1:, 1:] = Sigma - Meqconst[0][0] = 0 + Meqconst = np.eye(n + 1) + Meqconst[1:, 1:] = Sigma + Meqconst[0][0] = 0 - # print (Meqconst) + # print (Meqconst) - First = np.zeros(shape=(n + 1, n + 1)) - First[0, 0] = 1. + First = np.zeros(shape=(n + 1, n + 1)) + First[0, 0] = 1.0 - X = cp.Variable((n + 1, n + 1)) + X = cp.Variable((n + 1, n + 1)) - objective = cp.Maximize(cp.trace(M @ X)) + objective = cp.Maximize(cp.trace(M @ X)) - constraints = [X >> 0] - constraints += [cp.trace(Meqconst @ X) >= s] - constraints += [cp.trace(First @ X) == 1] + constraints = [X >> 0] + constraints += [cp.trace(Meqconst @ X) >= s] + constraints += [cp.trace(First @ X) == 1] - prob = cp.Problem(objective, constraints) - prob.solve() + prob = cp.Problem(objective, constraints) + prob.solve() - # print (X.value[1:,1:]) - eigvals, eigvecs = np.linalg.eig(X.value[1:, 1:]) + # print (X.value[1:,1:]) + eigvals, eigvecs = np.linalg.eig(X.value[1:, 1:]) - index = np.argmax(eigvals) - val = np.max(eigvals) - x = np.real(eigvecs[index] * np.sqrt(val)) - return val, x + index = np.argmax(eigvals) + val = np.max(eigvals) + x = np.real(eigvecs[index] * np.sqrt(val)) + return val, x def convex_QCQP(A, a, s, Sigma=None, threads=4, verbose=False): - """ - Solving - - min xAx - 2ax - s.t. xSigmax \leq s - A, Sigma psd. - - :param A: - :param a: - :param s: - :param Sigma: - :return: - """ - n = A.shape[0] - - if Sigma is None: - I = np.eye(n) - Sigma = I - - x = cp.Variable(n) - objective = cp.Minimize(cp.quad_form(x, A) - 2 * x @ a) - zero = np.zeros(n) - # constraints = [ cp.SOC(zero@x + np.array([np.sqrt(s)]), Sigma @ x)] - constraints = [cp.quad_form(x, Sigma) <= s] - prob = cp.Problem(objective, constraints) - prob.solve(solver=cp.MOSEK, mosek_params={mosek.iparam.num_threads: threads, - mosek.iparam.intpnt_solve_form: mosek.solveform.dual, - mosek.dparam.intpnt_co_tol_pfeas: 1e-8, - mosek.dparam.intpnt_co_tol_dfeas: 1e-8, - mosek.dparam.intpnt_co_tol_rel_gap: 1e-8}, - verbose=verbose) - - x_no_const = x.value.reshape(-1, 1) - val = prob.value - return val, x_no_const + """ + Solving + + min xAx - 2ax + s.t. xSigmax \leq s + A, Sigma psd. + + :param A: + :param a: + :param s: + :param Sigma: + :return: + """ + n = A.shape[0] + + if Sigma is None: + I = np.eye(n) + Sigma = I + + x = cp.Variable(n) + objective = cp.Minimize(cp.quad_form(x, A) - 2 * x @ a) + zero = np.zeros(n) + # constraints = [ cp.SOC(zero@x + np.array([np.sqrt(s)]), Sigma @ x)] + constraints = [cp.quad_form(x, Sigma) <= s] + prob = cp.Problem(objective, constraints) + prob.solve( + solver=cp.MOSEK, + mosek_params={ + mosek.iparam.num_threads: threads, + mosek.iparam.intpnt_solve_form: mosek.solveform.dual, + mosek.dparam.intpnt_co_tol_pfeas: 1e-8, + mosek.dparam.intpnt_co_tol_dfeas: 1e-8, + mosek.dparam.intpnt_co_tol_rel_gap: 1e-8, + }, + verbose=verbose, + ) + + x_no_const = x.value.reshape(-1, 1) + val = prob.value + return val, x_no_const def QCQP_problem(A, a, s, Sigma=None, threads=4, verbose=False): - """ - Solving - - min xAx - 2ax - s.t. xSigmax == s - - - :param A: - :param a: - :param s: - :param Sigma: - :return: - """ - n = A.shape[0] - lam = cp.Variable(1) - if Sigma is None: - I = np.eye(n) - Sigma = I - - objective = cp.Maximize(lam * s - cp.matrix_frac(a, A - lam * Sigma)) - constraints = [A - lam * Sigma >> 0] - prob = cp.Problem(objective, constraints) - prob.solve(solver=cp.MOSEK, mosek_params={mosek.iparam.num_threads: threads, - mosek.iparam.intpnt_solve_form: mosek.solveform.dual, - mosek.dparam.intpnt_co_tol_pfeas: 1e-12, - mosek.dparam.intpnt_co_tol_dfeas: 1e-12, - mosek.dparam.intpnt_co_tol_rel_gap: 1e-12}, - verbose=verbose) - - x_no_const = np.linalg.solve(A - lam.value * Sigma, a) - val = prob.value - return val, x_no_const + """ + Solving + + min xAx - 2ax + s.t. xSigmax == s + + + :param A: + :param a: + :param s: + :param Sigma: + :return: + """ + n = A.shape[0] + lam = cp.Variable(1) + if Sigma is None: + I = np.eye(n) + Sigma = I + + objective = cp.Maximize(lam * s - cp.matrix_frac(a, A - lam * Sigma)) + constraints = [A - lam * Sigma >> 0] + prob = cp.Problem(objective, constraints) + prob.solve( + solver=cp.MOSEK, + mosek_params={ + mosek.iparam.num_threads: threads, + mosek.iparam.intpnt_solve_form: mosek.solveform.dual, + mosek.dparam.intpnt_co_tol_pfeas: 1e-12, + mosek.dparam.intpnt_co_tol_dfeas: 1e-12, + mosek.dparam.intpnt_co_tol_rel_gap: 1e-12, + }, + verbose=verbose, + ) + + x_no_const = np.linalg.solve(A - lam.value * Sigma, a) + val = prob.value + return val, x_no_const def solve_mpi(Q, c, tau, verbose=True, up=None, low=None, xwarm=None): - """ - Solve MIP program - - - """ - if verbose == True: - print("Starting Acq. Fucn solver...") - print("Resolution: ") - # Grid - - # tau = torch.from_numpy(np.arange(-n, n + 1, 1).astype(np.double)) / n - N = tau.size()[0] - d = Q.size()[0] - s = torch.ones(N) - Tau = torch.zeros(size=(d, d * N), dtype=torch.float64) - S = torch.zeros(size=(d, d * N), dtype=torch.float64) - - for j in range(d): - Tau[j, j * N:(j + 1) * N] = tau - S[j, j * N:(j + 1) * N] = s - - B = Q @ Tau - - if (up is not None) or (low is not None): - G = torch.cat((B, -B, S, -S, torch.t(c), -torch.t(c))) - h = torch.ones(4 * d + 2) - h[0:2 * d] = 1 - h[3 * d:4 * d] = -1 - h[4 * d] = up - h[4 * d + 1] = -low - else: - G = torch.cat((B, -B, S, -S)) - h = torch.ones(4 * d) - h[0:2 * d] = 1 - h[3 * d:4 * d] = -1 - # Indicator variables - - x = cp.Variable(d * N, boolean=True) - if xwarm is not None: - x.value = xwarm.detach().numpy() - c = c.view(-1).detach().numpy() - - objective = cp.Minimize(-c * x) - constraints = [0 <= x, x <= 1, G.detach().numpy() * x <= h.view(-1).detach().numpy()] - prob = cp.Problem(objective, constraints) - prob.solve(solver=cp.MOSEK, verbose=verbose, warm_start=True) - - # print (x.value) - - return (torch.from_numpy(Tau.numpy() @ x.value), np.dot(c, x.value)) + """ + Solve MIP program + + + """ + if verbose == True: + print("Starting Acq. Fucn solver...") + print("Resolution: ") + # Grid + + # tau = torch.from_numpy(np.arange(-n, n + 1, 1).astype(np.double)) / n + N = tau.size()[0] + d = Q.size()[0] + s = torch.ones(N) + Tau = torch.zeros(size=(d, d * N), dtype=torch.float64) + S = torch.zeros(size=(d, d * N), dtype=torch.float64) + + for j in range(d): + Tau[j, j * N : (j + 1) * N] = tau + S[j, j * N : (j + 1) * N] = s + + B = Q @ Tau + + if (up is not None) or (low is not None): + G = torch.cat((B, -B, S, -S, torch.t(c), -torch.t(c))) + h = torch.ones(4 * d + 2) + h[0 : 2 * d] = 1 + h[3 * d : 4 * d] = -1 + h[4 * d] = up + h[4 * d + 1] = -low + else: + G = torch.cat((B, -B, S, -S)) + h = torch.ones(4 * d) + h[0 : 2 * d] = 1 + h[3 * d : 4 * d] = -1 + # Indicator variables + + x = cp.Variable(d * N, boolean=True) + if xwarm is not None: + x.value = xwarm.detach().numpy() + c = c.view(-1).detach().numpy() + + objective = cp.Minimize(-c * x) + constraints = [ + 0 <= x, + x <= 1, + G.detach().numpy() * x <= h.view(-1).detach().numpy(), + ] + prob = cp.Problem(objective, constraints) + prob.solve(solver=cp.MOSEK, verbose=verbose, warm_start=True) + + # print (x.value) + + return (torch.from_numpy(Tau.numpy() @ x.value), np.dot(c, x.value)) def newton_solve(f, x0, eps=1e-3, maxiter=100, verbose=False, grad=None): - """ - >>> newton_solve(lambda x: x**2,torch.Tensor([2.0,1.0]).double().view(-1)) - tensor([0., 0.], dtype=torch.float64) - """ - lam = 1. - d = int(x0.size()[0]) - x0.requires_grad_(True) - x = torch.zeros(size=(d, 1), requires_grad=True).view(-1).double() - x.data = x0.data - res = f(x) ** 2 - i = 0 - s = 1. - - while torch.max(res) > eps and i < maxiter: - i = i + 1 - - if grad is None: - nabla_f = torch.autograd.functional.jacobian(f, x, strict=True) - else: - nabla_f = grad(x) - - if verbose: - print(i, "err:", torch.max(res), s) - print(nabla_f.size()) - print("-----------------------") - - xn = x - torch.linalg.solve(nabla_f + torch.eye(d).double() * s, f(x).view(-1, 1)).view(-1) - resn = f(xn) ** 2 - - if torch.max(resn) < torch.max(res): - x = xn.requires_grad_(True) - # lam = np.minimum(lam * 2,1) - s = s / 2 - res = resn - - else: - s = s * 2 - # lam = lam /2. - return x + """ + >>> newton_solve(lambda x: x**2,torch.Tensor([2.0,1.0]).double().view(-1)) + tensor([0., 0.], dtype=torch.float64) + """ + lam = 1.0 + d = int(x0.size()[0]) + x0.requires_grad_(True) + x = torch.zeros(size=(d, 1), requires_grad=True).view(-1).double() + x.data = x0.data + res = f(x) ** 2 + i = 0 + s = 1.0 + + while torch.max(res) > eps and i < maxiter: + i = i + 1 + + if grad is None: + nabla_f = torch.autograd.functional.jacobian(f, x, strict=True) + else: + nabla_f = grad(x) + + if verbose: + print(i, "err:", torch.max(res), s) + print(nabla_f.size()) + print("-----------------------") + + xn = x - torch.linalg.solve( + nabla_f + torch.eye(d).double() * s, f(x).view(-1, 1) + ).view(-1) + resn = f(xn) ** 2 + + if torch.max(resn) < torch.max(res): + x = xn.requires_grad_(True) + # lam = np.minimum(lam * 2,1) + s = s / 2 + res = resn + + else: + s = s * 2 + # lam = lam /2. + return x def matrix_recovery_hermitian_trace_regression(X, b, eps=1e-5): - """ + """ - :param X: list of matrices - :param b: vector of resposnes - :param eps: constraint tolerance - :return: reocvered matrix - """ + :param X: list of matrices + :param b: vector of resposnes + :param eps: constraint tolerance + :return: reocvered matrix + """ - d = X[0].shape[0] - N = len(X) - Z = cp.Variable((d, d), symmetric=True) + d = X[0].shape[0] + N = len(X) + Z = cp.Variable((d, d), symmetric=True) - constraints = [Z >> 0] - constraints += [ - cp.trace(X[i] @ Z) >= b[i] - eps for i in range(N) - ] - constraints += [ - cp.trace(X[i] @ Z) <= b[i] + eps for i in range(N) - ] + constraints = [Z >> 0] + constraints += [cp.trace(X[i] @ Z) >= b[i] - eps for i in range(N)] + constraints += [cp.trace(X[i] @ Z) <= b[i] + eps for i in range(N)] - prob = cp.Problem(cp.Minimize(cp.norm(Z, "nuc")), - constraints) + prob = cp.Problem(cp.Minimize(cp.norm(Z, "nuc")), constraints) - prob.solve() + prob.solve() - return Z.value + return Z.value if __name__ == "__main__": - newton_solve(lambda x: x ** 2, torch.Tensor([2.0, 1.0]).double().view(-1), verbose=True) + newton_solve( + lambda x: x**2, torch.Tensor([2.0, 1.0]).double().view(-1), verbose=True + ) diff --git a/stpy/optim/frank_wolfe.py b/stpy/optim/frank_wolfe.py index ebee977..0e303e4 100644 --- a/stpy/optim/frank_wolfe.py +++ b/stpy/optim/frank_wolfe.py @@ -3,57 +3,57 @@ from scipy.optimize import minimize_scalar -def step_frank_wolfe_simplex(F, nablaF, x, step_size = 'opt'): - d = x.shape[0] - nabla = nablaF(x) - index = np.argmax(nabla) - e = np.zeros(d) - e[index] = 1. - if step_size == 'opt': - fn = lambda h: -F(x * h + (1 - h) * e) - res = minimize_scalar(fn, bounds=(10e-8, 1 - 10e-8), method='bounded') - gamma = res.x - else: - gamma = 1. - x = x * gamma + (1 - gamma) * e - return x - - -def step_exponential_gradient_descent(nablaF, x, eta=1.): - """ - - :param nablaF: - :param x: - :param eta: - :return: - """ - x = x * torch.exp(eta * nablaF(x)) - x = x / torch.sum(x) - return x +def step_frank_wolfe_simplex(F, nablaF, x, step_size="opt"): + d = x.shape[0] + nabla = nablaF(x) + index = np.argmax(nabla) + e = np.zeros(d) + e[index] = 1.0 + if step_size == "opt": + fn = lambda h: -F(x * h + (1 - h) * e) + res = minimize_scalar(fn, bounds=(10e-8, 1 - 10e-8), method="bounded") + gamma = res.x + else: + gamma = 1.0 + x = x * gamma + (1 - gamma) * e + return x + + +def step_exponential_gradient_descent(nablaF, x, eta=1.0): + """ + + :param nablaF: + :param x: + :param eta: + :return: + """ + x = x * torch.exp(eta * nablaF(x)) + x = x / torch.sum(x) + return x def step_wa_simlex(F, nablaF, x, optimality): - d = x.shape[0] - nabla = nablaF(x) - e_plus = np.max(nabla) - e_minus = np.min(nabla) - i_minus = np.argmin(nabla) - i_plus = np.argmax(nabla) - e = np.zeros(d) - - if (e_plus - optimality) / optimality > (optimality - e_minus) / optimality: - index = i_plus - e[index] = 1. - fn = lambda h: -F(x * h + (1 - h) * e) - res = minimize_scalar(fn, bounds=(10e-8, 1 - 10e-8), method='bounded') - gamma = res.x - x = x * gamma + (1. - gamma) * e - else: - index = i_minus - e[index] = 1. - fn = lambda h: -F((x + h * e) / (1 + h)) - # res = minimize_scalar(fn,bounds=(0.,1/(1-x[index])),method='bounded') - res = minimize_scalar(fn, bounds=(-x[index], 1 - x[index]), method='bounded') - gamma = res.x - x = (x + gamma * e) / (1 + gamma) - return x + d = x.shape[0] + nabla = nablaF(x) + e_plus = np.max(nabla) + e_minus = np.min(nabla) + i_minus = np.argmin(nabla) + i_plus = np.argmax(nabla) + e = np.zeros(d) + + if (e_plus - optimality) / optimality > (optimality - e_minus) / optimality: + index = i_plus + e[index] = 1.0 + fn = lambda h: -F(x * h + (1 - h) * e) + res = minimize_scalar(fn, bounds=(10e-8, 1 - 10e-8), method="bounded") + gamma = res.x + x = x * gamma + (1.0 - gamma) * e + else: + index = i_minus + e[index] = 1.0 + fn = lambda h: -F((x + h * e) / (1 + h)) + # res = minimize_scalar(fn,bounds=(0.,1/(1-x[index])),method='bounded') + res = minimize_scalar(fn, bounds=(-x[index], 1 - x[index]), method="bounded") + gamma = res.x + x = (x + gamma * e) / (1 + gamma) + return x diff --git a/stpy/optim/hyper_parameter_opt.py b/stpy/optim/hyper_parameter_opt.py index aa25b28..33c1eed 100755 --- a/stpy/optim/hyper_parameter_opt.py +++ b/stpy/optim/hyper_parameter_opt.py @@ -6,127 +6,147 @@ class HyperParameterOpt: - def __init__(self, obj, x, y, fun, params): - - self.mode = obj - self.x = x - self.y = y - self.fun = fun - self.params = params - - def optimize(self, type, optimizer, restarts): - - ## Bandwidth optimization - def bandwidth_opt(X): - gamma = X - Rot = torch.eye(self.x.size()[1], dtype=torch.float64) - return self.log_marginal_likelihood(gamma, Rot, 1.0, kernel=" ") - - def bandwidth_opt_handler(): - manifold = Euclidean(self.kernel_object.gamma.size()[0]) - C = CostFunction(bandwidth_opt, number_args=1) - xinit = lambda: np.random.randn() ** 2 + np.abs( - torch.zeros(self.kernel_object.gamma.size()[0], dtype=torch.float64).numpy()) - return optimize(manifold, C, 1, xinit) - - def bandwidth_kappa_opt(X): - gamma = X[0] - kappa = X[1] - Rot = torch.eye(self.x.size()[1], dtype=torch.float64) - return self.log_marginal_likelihood(gamma, Rot, kappa, kernel=" ") - - def bandwidth_kappa_opt_handler(): - manifold1 = Euclidean(self.kernel_object.gamma.size()[0]) - manifold2 = Euclidean(1) - manifold = Product((manifold1, manifold2)) - C = CostFunction(bandwidth_kappa_opt, number_args=2) - xinit = lambda x: [torch.randn(self.kernel_object.gamma.size()[0], dtype=torch.float64).numpy(), - np.abs(torch.randn(1, dtype=torch.float64).numpy())] - return optimize(manifold, C, 2, xinit) - - ## Rotations optimization - def rotations_opt(X): - Rot = X - return self.log_marginal_likelihood(self.kernel_object.gamma, Rot, self.kernel_object.kappa, kernel=" ") - - def rotations_opt_handler(): - rots = Rotations(self.kernel_object.gamma.size()[0]) - manifold = rots - xinit = lambda: torch.qr(torch.randn(self.x.size()[1], self.x.size()[1], dtype=torch.float64))[0].numpy() - C = CostFunction(rotations_opt, number_args=1) - return optimize(manifold, C, 1, xinit) - - ## Bandwidth and Rotations optimization - def bandwith_rotations_opt(X): - gamma = X[0] - Rot = X[1] - return self.log_marginal_likelihood(gamma, Rot, 0.1, kernel=" ") - - def bandwidth_rotations_opt_handler(): - eucl = Euclidean(self.kernel_object.gamma.size()[0]) - rots = Rotations(self.kernel_object.gamma.size()[0]) - manifold = Product((eucl, rots)) - xinit = lambda: [torch.randn(self.kernel_object.gamma.size()[0], dtype=torch.float64).numpy(), - torch.qr(torch.randn(self.x.size()[1], self.x.size()[1], dtype=torch.float64))[0].numpy()] - C = CostFunction(bandwith_rotations_opt, number_args=2) - return optimize(manifold, C, 2, xinit) - - ## Bandwidth and Rotations optimization - def bandwith_kappa_rotations_opt(X): - gamma = X[0] - kappa = X[1] - Rot = X[2] - return self.log_marginal_likelihood(gamma, Rot, kappa, kernel=" ") - - def bandwidth_kappa_rotations_opt_handler(): - eucl = Euclidean(self.kernel_object.gamma.size()[0]) - eucl2 = Euclidean(1) - rots = Rotations(self.kernel_object.gamma.size()[0]) - manifold = Product((eucl, eucl2, rots)) - xinit = [self.kernel_object.gamma.numpy(), torch.eye(self.x.size()[1], dtype=torch.float64).numpy()] - C = CostFunction(bandwith_kappa_rotations_opt, number_args=2) - return optimize(manifold, C, 2, xinit) - - # Finalize - if type == "bandwidth": - best_params = bandwidth_opt_handler() - self.kernel_object.gamma = torch.abs(best_params[0]).detach() - - elif type == "rots": - best_params = rotations_opt_handler() - Rot = best_params[0].detach() - print("Rotation:", Rot) - self.Rot = Rot - self.x = torch.mm(self.x, Rot).detach() - - elif type == "bandwidth+kappa": - best_params = bandwidth_kappa_opt_handler() - self.kernel_object.gamma = torch.abs(best_params[0]).detach() - self.s = torch.abs(best_params[1]).detach() - - elif type == "bandwidth+rots": - best_params = bandwidth_rotations_opt_handler() - self.kernel_object.gamma = torch.abs(best_params[0]).detach() - Rot = best_params[1].detach() - print("Rotation:", Rot) - self.Rot = Rot - self.x = torch.mm(self.x, Rot).detach() - - elif type == "bandwidth+kappa+rots": - best_params = bandwidth_kappa_rotations_opt_handler() - self.kernel_object.gamma = torch.abs(best_params[0]).detach() - self.s = torch.abs(best_params[1]).detach() - Rot = best_params[2].detach() - print("Rotation:", Rot) - self.Rot = Rot - self.x = torch.mm(self.x, Rot).detach() - - else: - raise AttributeError("Optimization scheme not implemented") - - self.back_prop = False - self.fit = False - self.fit_gp(self.x, self.y) - print(self.description()) - - return True + def __init__(self, obj, x, y, fun, params): + + self.mode = obj + self.x = x + self.y = y + self.fun = fun + self.params = params + + def optimize(self, type, optimizer, restarts): + + ## Bandwidth optimization + def bandwidth_opt(X): + gamma = X + Rot = torch.eye(self.x.size()[1], dtype=torch.float64) + return self.log_marginal_likelihood(gamma, Rot, 1.0, kernel=" ") + + def bandwidth_opt_handler(): + manifold = Euclidean(self.kernel_object.gamma.size()[0]) + C = CostFunction(bandwidth_opt, number_args=1) + xinit = lambda: np.random.randn() ** 2 + np.abs( + torch.zeros( + self.kernel_object.gamma.size()[0], dtype=torch.float64 + ).numpy() + ) + return optimize(manifold, C, 1, xinit) + + def bandwidth_kappa_opt(X): + gamma = X[0] + kappa = X[1] + Rot = torch.eye(self.x.size()[1], dtype=torch.float64) + return self.log_marginal_likelihood(gamma, Rot, kappa, kernel=" ") + + def bandwidth_kappa_opt_handler(): + manifold1 = Euclidean(self.kernel_object.gamma.size()[0]) + manifold2 = Euclidean(1) + manifold = Product((manifold1, manifold2)) + C = CostFunction(bandwidth_kappa_opt, number_args=2) + xinit = lambda x: [ + torch.randn( + self.kernel_object.gamma.size()[0], dtype=torch.float64 + ).numpy(), + np.abs(torch.randn(1, dtype=torch.float64).numpy()), + ] + return optimize(manifold, C, 2, xinit) + + ## Rotations optimization + def rotations_opt(X): + Rot = X + return self.log_marginal_likelihood( + self.kernel_object.gamma, Rot, self.kernel_object.kappa, kernel=" " + ) + + def rotations_opt_handler(): + rots = Rotations(self.kernel_object.gamma.size()[0]) + manifold = rots + xinit = lambda: torch.qr( + torch.randn(self.x.size()[1], self.x.size()[1], dtype=torch.float64) + )[0].numpy() + C = CostFunction(rotations_opt, number_args=1) + return optimize(manifold, C, 1, xinit) + + ## Bandwidth and Rotations optimization + def bandwith_rotations_opt(X): + gamma = X[0] + Rot = X[1] + return self.log_marginal_likelihood(gamma, Rot, 0.1, kernel=" ") + + def bandwidth_rotations_opt_handler(): + eucl = Euclidean(self.kernel_object.gamma.size()[0]) + rots = Rotations(self.kernel_object.gamma.size()[0]) + manifold = Product((eucl, rots)) + xinit = lambda: [ + torch.randn( + self.kernel_object.gamma.size()[0], dtype=torch.float64 + ).numpy(), + torch.qr( + torch.randn(self.x.size()[1], self.x.size()[1], dtype=torch.float64) + )[0].numpy(), + ] + C = CostFunction(bandwith_rotations_opt, number_args=2) + return optimize(manifold, C, 2, xinit) + + ## Bandwidth and Rotations optimization + def bandwith_kappa_rotations_opt(X): + gamma = X[0] + kappa = X[1] + Rot = X[2] + return self.log_marginal_likelihood(gamma, Rot, kappa, kernel=" ") + + def bandwidth_kappa_rotations_opt_handler(): + eucl = Euclidean(self.kernel_object.gamma.size()[0]) + eucl2 = Euclidean(1) + rots = Rotations(self.kernel_object.gamma.size()[0]) + manifold = Product((eucl, eucl2, rots)) + xinit = [ + self.kernel_object.gamma.numpy(), + torch.eye(self.x.size()[1], dtype=torch.float64).numpy(), + ] + C = CostFunction(bandwith_kappa_rotations_opt, number_args=2) + return optimize(manifold, C, 2, xinit) + + # Finalize + if type == "bandwidth": + best_params = bandwidth_opt_handler() + self.kernel_object.gamma = torch.abs(best_params[0]).detach() + + elif type == "rots": + best_params = rotations_opt_handler() + Rot = best_params[0].detach() + print("Rotation:", Rot) + self.Rot = Rot + self.x = torch.mm(self.x, Rot).detach() + + elif type == "bandwidth+kappa": + best_params = bandwidth_kappa_opt_handler() + self.kernel_object.gamma = torch.abs(best_params[0]).detach() + self.s = torch.abs(best_params[1]).detach() + + elif type == "bandwidth+rots": + best_params = bandwidth_rotations_opt_handler() + self.kernel_object.gamma = torch.abs(best_params[0]).detach() + Rot = best_params[1].detach() + print("Rotation:", Rot) + self.Rot = Rot + self.x = torch.mm(self.x, Rot).detach() + + elif type == "bandwidth+kappa+rots": + best_params = bandwidth_kappa_rotations_opt_handler() + self.kernel_object.gamma = torch.abs(best_params[0]).detach() + self.s = torch.abs(best_params[1]).detach() + Rot = best_params[2].detach() + print("Rotation:", Rot) + self.Rot = Rot + self.x = torch.mm(self.x, Rot).detach() + + else: + raise AttributeError("Optimization scheme not implemented") + + self.back_prop = False + self.fit = False + self.fit_gp(self.x, self.y) + print(self.description()) + + return True diff --git a/stpy/optim/manifold_optimization.py b/stpy/optim/manifold_optimization.py index acc25c9..3ade0eb 100644 --- a/stpy/optim/manifold_optimization.py +++ b/stpy/optim/manifold_optimization.py @@ -5,41 +5,50 @@ def optimize(manifold, cost_function, number_args, sampling_func, optimizer, restarts): - [cost_numpy, egrad_numpy, ehess_numpy] = cost_function.define() - - if optimizer == "pymanopt": - problem = Problem(manifold=manifold, cost=cost_numpy, egrad=egrad_numpy, ehess=ehess_numpy, verbosity=1) - solver = SteepestDescent(maxiter=100, mingradnorm=1e-8, minstepsize=1e-10) - - def solve(problem, x=None): - return solver.solve(problem, x=x) - - elif optimizer == "scipy": - problem = None - - def solve(problem, x=None): - res = minimize(cost_numpy, xinit, method="L-BFGS-B", jac=egrad_numpy, tol=0.0001) - return res.x - else: - raise NotImplementedError - - # optimization - repeats = restarts - best = 10e10 - best_params = [i for i in range(number_args)] - - for _ in range(repeats): - xinit = sampling_func() - # try: - Xopt = solve(problem, x=xinit) - print(xinit) - cost = cost_numpy(Xopt) - print("Run:", _, " cost: ", cost) - if cost < best: - best = cost - if len(best_params) > 1: - for j in range(number_args): - best_params[j] = torch.from_numpy(Xopt[j]) - else: - best_params[0] = torch.from_numpy(Xopt) - return best_params + [cost_numpy, egrad_numpy, ehess_numpy] = cost_function.define() + + if optimizer == "pymanopt": + problem = Problem( + manifold=manifold, + cost=cost_numpy, + egrad=egrad_numpy, + ehess=ehess_numpy, + verbosity=1, + ) + solver = SteepestDescent(maxiter=100, mingradnorm=1e-8, minstepsize=1e-10) + + def solve(problem, x=None): + return solver.solve(problem, x=x) + + elif optimizer == "scipy": + problem = None + + def solve(problem, x=None): + res = minimize( + cost_numpy, xinit, method="L-BFGS-B", jac=egrad_numpy, tol=0.0001 + ) + return res.x + + else: + raise NotImplementedError + + # optimization + repeats = restarts + best = 10e10 + best_params = [i for i in range(number_args)] + + for _ in range(repeats): + xinit = sampling_func() + # try: + Xopt = solve(problem, x=xinit) + print(xinit) + cost = cost_numpy(Xopt) + print("Run:", _, " cost: ", cost) + if cost < best: + best = cost + if len(best_params) > 1: + for j in range(number_args): + best_params[j] = torch.from_numpy(Xopt[j]) + else: + best_params[0] = torch.from_numpy(Xopt) + return best_params diff --git a/stpy/point_processes/binomial/binomial_process.py b/stpy/point_processes/binomial/binomial_process.py index ddbf89e..f3ce7f2 100644 --- a/stpy/point_processes/binomial/binomial_process.py +++ b/stpy/point_processes/binomial/binomial_process.py @@ -1,38 +1,38 @@ import torch -class BernoulliPointProcess(): +class BernoulliPointProcess: - def __init__(self, basic_sets, d=1, rate=None): - self.basic_sets = basic_sets - self.rate = rate - self.d = d + def __init__(self, basic_sets, d=1, rate=None): + self.basic_sets = basic_sets + self.rate = rate + self.d = d - def is_basic(self, S): - """ - :return: - """ - for set in self.basic_sets: - if hash(set) == hash(S): - return True - return False + def is_basic(self, S): + """ + :return: + """ + for set in self.basic_sets: + if hash(set) == hash(S): + return True + return False - def sample(self, S, t=None, dt=None): - if self.is_basic(S): - rv = torch.bernoulli(self.rate(S)) - if rv > 0.5: - return (S, 1., 1., dt, t) - else: - return (S, 0., 1., dt, t) - else: - # iterate over all sets that contain it - outcome = 0. - for set in self.basic_sets: - if S.inside(set): - rv = float(torch.bernoulli(self.rate(S))) - outcome = max(rv, 0.) - if outcome > 0.5: - return (S, 1., 1., dt, t) - else: - return (S, 0., 1., dt, t) - pass + def sample(self, S, t=None, dt=None): + if self.is_basic(S): + rv = torch.bernoulli(self.rate(S)) + if rv > 0.5: + return (S, 1.0, 1.0, dt, t) + else: + return (S, 0.0, 1.0, dt, t) + else: + # iterate over all sets that contain it + outcome = 0.0 + for set in self.basic_sets: + if S.inside(set): + rv = float(torch.bernoulli(self.rate(S))) + outcome = max(rv, 0.0) + if outcome > 0.5: + return (S, 1.0, 1.0, dt, t) + else: + return (S, 0.0, 1.0, dt, t) + pass diff --git a/stpy/point_processes/binomial/binomial_process_estimator.py b/stpy/point_processes/binomial/binomial_process_estimator.py index bf88e90..48b8ec7 100644 --- a/stpy/point_processes/binomial/binomial_process_estimator.py +++ b/stpy/point_processes/binomial/binomial_process_estimator.py @@ -12,473 +12,653 @@ class BernoulliRateEstimator(RateEstimator): - """ - without link function, but with inequality constraints - """ - - def __init__(self, hierarchy, d=1, m=100, kernel_object=None, B=1., s=1., jitter=10e-8, b=0., basis='triangle', - offset=0.1, uncertainty='laplace'): - - self.d = d - self.s = s - self.b = b - self.B = B - self.uncertainty = uncertainty - self.hierarchy = hierarchy - self.kernel_object = kernel_object - self.packing = TriangleEmbedding(d, m, kernel_object=kernel_object, B=1., b=0., offset=offset, - s=np.sqrt(jitter)) - self.feedback = "histogram" - self.data = None - - self.basic_sets = self.hierarchy.get_sets_level(self.hierarchy.levels) - self.varphis = torch.zeros(size=(len(self.basic_sets), self.get_m())).double() - - for index_set, set in enumerate(self.basic_sets): - self.varphis[index_set, :] = self.embed_set(set) - - def embed_set(self, S): - return self.packing.integral(S).view(1, -1) - - def load_data(self, data): - """ - - :param data: (S, no_events, out_of, duration, time) - :return: - """ - self.data = [] - self.phis = None - for datapoint in data: - self.add_data_point(datapoint) - - def add_data_point(self, datapoint): - - if self.data is None: - self.load_data([datapoint]) - else: - - # add - self.data.append(datapoint) - - S, count, pool, duration, time = datapoint - phi = self.embed_set(S) - - if self.phis is not None: - self.counts = torch.cat((self.counts, torch.Tensor([count]))) - self.pool = torch.cat((self.pool, torch.Tensor([pool]))) - self.phis = torch.cat((self.phis, phi), dim=0) - else: - self.counts = torch.Tensor([count]).double() - self.pool = torch.Tensor([pool]).double() - self.phis = phi - - def nabla(self, theta): - # defining objective - if self.data is not None: - return - torch.einsum('i,ij,i->j', self.counts, self.phis, 1. / (self.phis @ theta).view(-1)).view(-1, 1) + \ - torch.einsum('i,ij,i->j', self.pool - self.counts, self.phis, - 1. / (1. - self.phis @ theta).view(-1)).view(-1, 1) \ - + self.s * theta.view(-1, 1) - else: - return self.s * theta.view(-1, 1) - - def sample(self, steps=10, verbose=False): - """ - Langevin dynamics to sample from constrained GP prior - - :param steps: Number of iterations - :return: - """ - l = np.zeros(shape=(len(self.basic_sets))) - u = np.zeros(shape=(len(self.basic_sets))) + 1. - - # prox operator - def prox(x): - res = solve_qp(np.eye(self.get_m()), x.numpy().reshape(-1), - C=np.vstack((-self.varphis.numpy(), self.varphis.numpy())).T, - b=np.hstack((-u, l)), factorized=True) - return torch.from_numpy(res[0]).view(-1, 1) - - # initialization - if self.rate is not None: - theta = self.rate.view(-1, 1) - else: - theta = self.b + 0.05 * torch.rand(size=(self.get_m(), 1), dtype=torch.float64, requires_grad=False).view( - -1, 1) ** 2 - - # loop - for k in range(steps): - w = torch.randn(size=(self.get_m(), 1)).double() - - # calculate proper step-size - W = self.construct_covariance(theta=theta) - L = float(scipy.sparse.linalg.eigsh(W.numpy(), k=1, which='LM', return_eigenvectors=False, tol=1e-3)) - eta = 0.5 / L - - theta = 0.5 * theta - eta * self.nabla(theta) + 0.5 * prox(theta) + np.sqrt(2 * eta) * w - if verbose == True: - print("Iter:", k, theta.T) - - self.sampled_theta = prox(theta) - - def construct_covariance(self, theta): - D1 = torch.diag(self.counts / (self.phis @ theta).view(-1) ** 2) - D2 = torch.diag((self.pool - self.counts) / (1 - self.phis @ theta).view(-1) ** 2) - - W = self.phis.T @ (D1 + D2) @ self.phis + self.s * torch.eye(self.get_m()).double() - return W - - def construct_confidence(self): - self.W = self.construct_covariance(self.rate) - self.invW = torch.pinverse(self.W) - - def construct_likelihood_ratio(self, method='full'): - # for data - phis = self.phis.numpy() - counts = self.counts.numpy() - - # for constraints - varphis = self.varphis.numpy() - - # current fit - mean_theta = self.rate.numpy() - - if method == 'split': - pass - elif method == 'full': - self.likelihood = - counts @ np.log(phis @ mean_theta) - (1 - counts) @ np.log(1 - phis @ mean_theta) \ - + self.s * 0.5 * np.sum(mean_theta - 0.5) ** 2 - elif method == 'cv': - pass - - def ucb(self, S, beta=8., delta=0.1): - if self.uncertainty == 'laplace': - ucb = self.embed_set(S) @ self.rate + beta * self.embed_set(S) @ self.invW @ self.embed_set(S).T - return torch.minimum(torch.Tensor([[1.]]).double(), ucb) - - elif self.uncertainty == "ratio": - phi = self.embed_set(S) - phis = self.phis.numpy() - varphis = self.varphis.numpy() - - counts = self.counts.numpy() - theta = cp.Variable(self.get_m()) - - objective = cp.Maximize(phi @ theta) - - v = np.log(1. / delta) + self.likelihood - constraints = [- counts @ cp.log(phis @ theta) - (1 - counts) @ cp.log(1 - phis @ theta) - + self.s * 0.5 * cp.sum_squares(theta - 0.5) <= v] - - # every set has probability between 0-1. - constraints.append(varphis @ theta >= np.zeros(varphis.shape[0])) - constraints.append(varphis @ theta <= np.ones(varphis.shape[0])) - - prob = cp.Problem(objective, constraints) - prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False, - mosek_params={mosek.iparam.intpnt_solve_form: mosek.solveform.primal, - mosek.dparam.intpnt_co_tol_pfeas: 1e-6, - mosek.dparam.intpnt_co_tol_dfeas: 1e-6, - mosek.dparam.intpnt_co_tol_rel_gap: 1e-6}) - return torch.minimum(torch.Tensor([[1.]]).double(), torch.from_numpy(np.array(prob.value))) - - def lcb(self, S, beta=8., delta=0.1): - if self.uncertainty == 'laplace': - lcb = self.embed_set(S) @ self.rate - beta * self.embed_set(S) @ self.invW @ self.embed_set(S).T - return torch.maximum(torch.Tensor([[0.]]).double(), lcb) - - elif self.uncertainty == "ratio": - phi = self.embed_set(S) - phis = self.phis.numpy() - varphis = self.varphis.numpy() - - counts = self.counts.numpy() - theta = cp.Variable(self.get_m()) - - objective = cp.Minimize(phi @ theta) - v = np.log(1. / delta) + self.likelihood - constraints = [- counts @ cp.log(phis @ theta) - (1 - counts) @ cp.log(1 - phis @ theta) - + self.s * 0.5 * cp.sum_squares(theta - 0.5) <= v] - - # every set has probability between 0-1. - constraints.append(varphis @ theta >= np.zeros(varphis.shape[0])) - constraints.append(varphis @ theta <= np.ones(varphis.shape[0])) - - prob = cp.Problem(objective, constraints) - prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False, - mosek_params={mosek.iparam.intpnt_solve_form: mosek.solveform.primal, - mosek.dparam.intpnt_co_tol_pfeas: 1e-6, - mosek.dparam.intpnt_co_tol_dfeas: 1e-6, - mosek.dparam.intpnt_co_tol_rel_gap: 1e-6}) - - return torch.maximum(torch.Tensor([[0.]]).double(), torch.from_numpy(np.array(prob.value))) - - def fit_gp(self, threads=4): - - phis = self.phis.numpy() - varphis = self.varphis.numpy() - - counts = self.counts.numpy() - theta = cp.Variable(self.get_m()) - objective = cp.Minimize(- counts @ cp.log(phis @ theta) - (1 - counts) @ cp.log(1 - phis @ theta) - + self.s * 0.5 * cp.sum_squares(theta - 0.5)) - - # probability constraints - constraints = [] - - # every set has probability between 0-1. - constraints.append(varphis @ theta >= np.zeros(varphis.shape[0])) - constraints.append(varphis @ theta <= np.ones(varphis.shape[0])) - - prob = cp.Problem(objective, constraints) - prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False, - mosek_params={mosek.iparam.num_threads: threads, - mosek.iparam.intpnt_solve_form: mosek.solveform.primal, - mosek.dparam.intpnt_co_tol_pfeas: 1e-6, - mosek.dparam.intpnt_co_tol_dfeas: 1e-6, - mosek.dparam.intpnt_co_tol_rel_gap: 1e-6}) - self.rate = torch.from_numpy(theta.value) - return self.rate + """ + without link function, but with inequality constraints + """ + + def __init__( + self, + hierarchy, + d=1, + m=100, + kernel_object=None, + B=1.0, + s=1.0, + jitter=10e-8, + b=0.0, + basis="triangle", + offset=0.1, + uncertainty="laplace", + ): + + self.d = d + self.s = s + self.b = b + self.B = B + self.uncertainty = uncertainty + self.hierarchy = hierarchy + self.kernel_object = kernel_object + self.packing = TriangleEmbedding( + d, + m, + kernel_object=kernel_object, + B=1.0, + b=0.0, + offset=offset, + s=np.sqrt(jitter), + ) + self.feedback = "histogram" + self.data = None + + self.basic_sets = self.hierarchy.get_sets_level(self.hierarchy.levels) + self.varphis = torch.zeros(size=(len(self.basic_sets), self.get_m())).double() + + for index_set, set in enumerate(self.basic_sets): + self.varphis[index_set, :] = self.embed_set(set) + + def embed_set(self, S): + return self.packing.integral(S).view(1, -1) + + def load_data(self, data): + """ + + :param data: (S, no_events, out_of, duration, time) + :return: + """ + self.data = [] + self.phis = None + for datapoint in data: + self.add_data_point(datapoint) + + def add_data_point(self, datapoint): + + if self.data is None: + self.load_data([datapoint]) + else: + + # add + self.data.append(datapoint) + + S, count, pool, duration, time = datapoint + phi = self.embed_set(S) + + if self.phis is not None: + self.counts = torch.cat((self.counts, torch.Tensor([count]))) + self.pool = torch.cat((self.pool, torch.Tensor([pool]))) + self.phis = torch.cat((self.phis, phi), dim=0) + else: + self.counts = torch.Tensor([count]).double() + self.pool = torch.Tensor([pool]).double() + self.phis = phi + + def nabla(self, theta): + # defining objective + if self.data is not None: + return ( + -torch.einsum( + "i,ij,i->j", + self.counts, + self.phis, + 1.0 / (self.phis @ theta).view(-1), + ).view(-1, 1) + + torch.einsum( + "i,ij,i->j", + self.pool - self.counts, + self.phis, + 1.0 / (1.0 - self.phis @ theta).view(-1), + ).view(-1, 1) + + self.s * theta.view(-1, 1) + ) + else: + return self.s * theta.view(-1, 1) + + def sample(self, steps=10, verbose=False): + """ + Langevin dynamics to sample from constrained GP prior + + :param steps: Number of iterations + :return: + """ + l = np.zeros(shape=(len(self.basic_sets))) + u = np.zeros(shape=(len(self.basic_sets))) + 1.0 + + # prox operator + def prox(x): + res = solve_qp( + np.eye(self.get_m()), + x.numpy().reshape(-1), + C=np.vstack((-self.varphis.numpy(), self.varphis.numpy())).T, + b=np.hstack((-u, l)), + factorized=True, + ) + return torch.from_numpy(res[0]).view(-1, 1) + + # initialization + if self.rate is not None: + theta = self.rate.view(-1, 1) + else: + theta = ( + self.b + + 0.05 + * torch.rand( + size=(self.get_m(), 1), dtype=torch.float64, requires_grad=False + ).view(-1, 1) + ** 2 + ) + + # loop + for k in range(steps): + w = torch.randn(size=(self.get_m(), 1)).double() + + # calculate proper step-size + W = self.construct_covariance(theta=theta) + L = float( + scipy.sparse.linalg.eigsh( + W.numpy(), k=1, which="LM", return_eigenvectors=False, tol=1e-3 + ) + ) + eta = 0.5 / L + + theta = ( + 0.5 * theta + - eta * self.nabla(theta) + + 0.5 * prox(theta) + + np.sqrt(2 * eta) * w + ) + if verbose == True: + print("Iter:", k, theta.T) + + self.sampled_theta = prox(theta) + + def construct_covariance(self, theta): + D1 = torch.diag(self.counts / (self.phis @ theta).view(-1) ** 2) + D2 = torch.diag( + (self.pool - self.counts) / (1 - self.phis @ theta).view(-1) ** 2 + ) + + W = ( + self.phis.T @ (D1 + D2) @ self.phis + + self.s * torch.eye(self.get_m()).double() + ) + return W + + def construct_confidence(self): + self.W = self.construct_covariance(self.rate) + self.invW = torch.pinverse(self.W) + + def construct_likelihood_ratio(self, method="full"): + # for data + phis = self.phis.numpy() + counts = self.counts.numpy() + + # for constraints + varphis = self.varphis.numpy() + + # current fit + mean_theta = self.rate.numpy() + + if method == "split": + pass + elif method == "full": + self.likelihood = ( + -counts @ np.log(phis @ mean_theta) + - (1 - counts) @ np.log(1 - phis @ mean_theta) + + self.s * 0.5 * np.sum(mean_theta - 0.5) ** 2 + ) + elif method == "cv": + pass + + def ucb(self, S, beta=8.0, delta=0.1): + if self.uncertainty == "laplace": + ucb = ( + self.embed_set(S) @ self.rate + + beta * self.embed_set(S) @ self.invW @ self.embed_set(S).T + ) + return torch.minimum(torch.Tensor([[1.0]]).double(), ucb) + + elif self.uncertainty == "ratio": + phi = self.embed_set(S) + phis = self.phis.numpy() + varphis = self.varphis.numpy() + + counts = self.counts.numpy() + theta = cp.Variable(self.get_m()) + + objective = cp.Maximize(phi @ theta) + + v = np.log(1.0 / delta) + self.likelihood + constraints = [ + -counts @ cp.log(phis @ theta) + - (1 - counts) @ cp.log(1 - phis @ theta) + + self.s * 0.5 * cp.sum_squares(theta - 0.5) + <= v + ] + + # every set has probability between 0-1. + constraints.append(varphis @ theta >= np.zeros(varphis.shape[0])) + constraints.append(varphis @ theta <= np.ones(varphis.shape[0])) + + prob = cp.Problem(objective, constraints) + prob.solve( + solver=cp.MOSEK, + warm_start=False, + verbose=False, + mosek_params={ + mosek.iparam.intpnt_solve_form: mosek.solveform.primal, + mosek.dparam.intpnt_co_tol_pfeas: 1e-6, + mosek.dparam.intpnt_co_tol_dfeas: 1e-6, + mosek.dparam.intpnt_co_tol_rel_gap: 1e-6, + }, + ) + return torch.minimum( + torch.Tensor([[1.0]]).double(), torch.from_numpy(np.array(prob.value)) + ) + + def lcb(self, S, beta=8.0, delta=0.1): + if self.uncertainty == "laplace": + lcb = ( + self.embed_set(S) @ self.rate + - beta * self.embed_set(S) @ self.invW @ self.embed_set(S).T + ) + return torch.maximum(torch.Tensor([[0.0]]).double(), lcb) + + elif self.uncertainty == "ratio": + phi = self.embed_set(S) + phis = self.phis.numpy() + varphis = self.varphis.numpy() + + counts = self.counts.numpy() + theta = cp.Variable(self.get_m()) + + objective = cp.Minimize(phi @ theta) + v = np.log(1.0 / delta) + self.likelihood + constraints = [ + -counts @ cp.log(phis @ theta) + - (1 - counts) @ cp.log(1 - phis @ theta) + + self.s * 0.5 * cp.sum_squares(theta - 0.5) + <= v + ] + + # every set has probability between 0-1. + constraints.append(varphis @ theta >= np.zeros(varphis.shape[0])) + constraints.append(varphis @ theta <= np.ones(varphis.shape[0])) + + prob = cp.Problem(objective, constraints) + prob.solve( + solver=cp.MOSEK, + warm_start=False, + verbose=False, + mosek_params={ + mosek.iparam.intpnt_solve_form: mosek.solveform.primal, + mosek.dparam.intpnt_co_tol_pfeas: 1e-6, + mosek.dparam.intpnt_co_tol_dfeas: 1e-6, + mosek.dparam.intpnt_co_tol_rel_gap: 1e-6, + }, + ) + + return torch.maximum( + torch.Tensor([[0.0]]).double(), torch.from_numpy(np.array(prob.value)) + ) + + def fit_gp(self, threads=4): + + phis = self.phis.numpy() + varphis = self.varphis.numpy() + + counts = self.counts.numpy() + theta = cp.Variable(self.get_m()) + objective = cp.Minimize( + -counts @ cp.log(phis @ theta) + - (1 - counts) @ cp.log(1 - phis @ theta) + + self.s * 0.5 * cp.sum_squares(theta - 0.5) + ) + + # probability constraints + constraints = [] + + # every set has probability between 0-1. + constraints.append(varphis @ theta >= np.zeros(varphis.shape[0])) + constraints.append(varphis @ theta <= np.ones(varphis.shape[0])) + + prob = cp.Problem(objective, constraints) + prob.solve( + solver=cp.MOSEK, + warm_start=False, + verbose=False, + mosek_params={ + mosek.iparam.num_threads: threads, + mosek.iparam.intpnt_solve_form: mosek.solveform.primal, + mosek.dparam.intpnt_co_tol_pfeas: 1e-6, + mosek.dparam.intpnt_co_tol_dfeas: 1e-6, + mosek.dparam.intpnt_co_tol_rel_gap: 1e-6, + }, + ) + self.rate = torch.from_numpy(theta.value) + return self.rate class LinkBernoulliRateEstimator(BernoulliRateEstimator): - def construct_covariance(self, theta): - D1 = torch.diag(self.counts / (self.phis @ theta).view(-1) ** 2) - D2 = torch.diag((self.pool - self.counts) / (1 - self.phis @ theta).view(-1) ** 2) - - W = self.phis.T @ (D1 + D2) @ self.phis + self.s * torch.eye(self.get_m()).double() - return W - - def log_marginal(self, kernel, X): - func = kernel.get_kernel() - K = func(self.x, self.x, **X) + torch.eye(self.n, dtype=torch.float64) * self.s * self.s - - L = torch.linalg.cholesky(K) - logdet = -0.5 * 2 * torch.sum(torch.log(torch.diag(L))) - alpha = torch.solve(self.y, K)[0] - logprob = -0.5 * torch.mm(torch.t(self.y), alpha) + logdet - logprob = -logprob - return logprob - - def construct_likelihood_ratio(self, method='full'): - # for data - phis = self.phis.numpy() - counts = self.counts.numpy() - - # for constraints - varphis = self.varphis.numpy() - - # current fit - mean_theta = self.rate.numpy() - - if method == 'split': - pass - elif method == 'full': - self.likelihood = - counts @ phis @ mean_theta + np.log(1 + np.exp(phis @ mean_theta)) \ - + self.s * 0.5 * np.sum(mean_theta) ** 2 - elif method == 'cv': - pass - - def fit_gp(self, threads=4): - phis = self.phis.numpy() - - counts = self.counts.numpy() - theta = cp.Variable(self.get_m()) - objective = cp.Minimize(-cp.sum(cp.multiply(counts, phis @ theta)) + cp.sum(cp.logistic(phis @ theta)) - + self.s * 0.5 * cp.sum_squares(theta)) - - # probability constraints - constraints = [] - - prob = cp.Problem(objective, constraints) - prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False, - mosek_params={mosek.iparam.num_threads: threads, - mosek.iparam.intpnt_solve_form: mosek.solveform.primal, - mosek.dparam.intpnt_co_tol_pfeas: 1e-6, - mosek.dparam.intpnt_co_tol_dfeas: 1e-6, - mosek.dparam.intpnt_co_tol_rel_gap: 1e-6}) - self.rate = torch.from_numpy(theta.value) - return self.rate - - def link(self, x): - return 1. / (1. + torch.exp(-x)) - - def mean_set(self, S): - return self.link(self.embed_set(S) @ self.rate) - - def ucb(self, S, beta=8., delta=0.1): - if self.uncertainty == "laplace": - ucb = self.embed_set(S) @ self.rate + beta * self.embed_set(S) @ self.invW @ self.embed_set(S).T - return self.link(ucb) - elif self.uncertainty == "martingale": - phi = self.embed_set(S) - hat_theta = self.rate.numpy() - - def constraint_value_gradient(theta, beta): - y = cp.Variable(self.get_m()) - v = (theta - hat_theta) - objective2 = cp.Maximize(y @ v - cp.sum(cp.abs(self.phis @ y)) - beta) - - prob = cp.Problem(objective2) - prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False, - mosek_params={mosek.iparam.intpnt_solve_form: mosek.solveform.primal, - mosek.dparam.intpnt_co_tol_pfeas: 1e-4, - mosek.dparam.intpnt_co_tol_dfeas: 1e-4, - mosek.dparam.intpnt_co_tol_rel_gap: 1e-4}) - print(prob.status) - return prob.value, y.value - - beta = 2. - iters = 10 - gamma = 0.00000001 - theta = hat_theta - print(theta) - - for k in range(iters): - print("Iter:", k) - d = cp.Variable(self.get_m()) - objective = cp.Minimize(phi @ d.T) - val, nabla = constraint_value_gradient(theta, beta) - constraints = [val + nabla.reshape(1, -1) @ d <= 0., cp.sum_squares(d) <= gamma] - prob = cp.Problem(objective, constraints) - prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False) - theta = theta + d.value - print(theta) - - return phi @ theta - - elif self.uncertainty == "ratio": - phi = self.embed_set(S) - phis = self.phis.numpy() - - counts = self.counts.numpy() - theta = cp.Variable(self.get_m()) - - objective = cp.Maximize(phi @ theta) - v = np.log(1. / delta) + self.likelihood - constraints = [-cp.sum(cp.multiply(counts, phis @ theta)) + cp.sum(cp.logistic(phis @ theta)) - + self.s * 0.5 * cp.sum_squares(theta) <= v] - - prob = cp.Problem(objective, constraints) - prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False, - mosek_params={mosek.iparam.intpnt_solve_form: mosek.solveform.primal, - mosek.dparam.intpnt_co_tol_pfeas: 1e-6, - mosek.dparam.intpnt_co_tol_dfeas: 1e-6, - mosek.dparam.intpnt_co_tol_rel_gap: 1e-6}) - return self.link(phi @ theta.value) - - def lcb(self, S, beta=8., delta=0.1): - if self.uncertainty == "laplace": - lcb = self.embed_set(S) @ self.rate - beta * self.embed_set(S) @ self.invW @ self.embed_set(S).T - return self.link(lcb) - elif self.uncertainty == "ratio": - phi = self.embed_set(S) - phis = self.phis.numpy() - - counts = self.counts.numpy() - theta = cp.Variable(self.get_m()) - - objective = cp.Minimize(phi @ theta) - v = np.log(1. / delta) + self.likelihood - constraints = [-cp.sum(cp.multiply(counts, phis @ theta)) + cp.sum(cp.logistic(phis @ theta)) - + self.s * 0.5 * cp.sum_squares(theta) <= v] - - prob = cp.Problem(objective, constraints) - prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False, - mosek_params={mosek.iparam.intpnt_solve_form: mosek.solveform.primal, - mosek.dparam.intpnt_co_tol_pfeas: 1e-6, - mosek.dparam.intpnt_co_tol_dfeas: 1e-6, - mosek.dparam.intpnt_co_tol_rel_gap: 1e-6}) - return self.link(phi @ theta.value) - - def nabla(self, theta): - if self.data is not None: - return -torch.einsum('i,ij->j', self.counts, self.phis).view(-1, 1) + \ - torch.einsum('i,ij,i->j', self.pool, self.phis, - 1. / (1. + torch.exp(self.phis @ theta).view(-1))).view(-1, 1) \ - + self.s * theta.view(-1, 1) - else: - return self.s * theta.view(-1, 1) - - def construct_covariance(self, theta): - W = torch.eye(self.get_m()).double() * self.s + torch.einsum('i,ij,ik->jk', - torch.exp(self.phis @ theta).view(-1) / ( - 1 + torch.exp(self.phis @ theta)).view( - -1) ** 2, self.phis, self.phis) - return W + def construct_covariance(self, theta): + D1 = torch.diag(self.counts / (self.phis @ theta).view(-1) ** 2) + D2 = torch.diag( + (self.pool - self.counts) / (1 - self.phis @ theta).view(-1) ** 2 + ) + + W = ( + self.phis.T @ (D1 + D2) @ self.phis + + self.s * torch.eye(self.get_m()).double() + ) + return W + + def log_marginal(self, kernel, X): + func = kernel.get_kernel() + K = ( + func(self.x, self.x, **X) + + torch.eye(self.n, dtype=torch.float64) * self.s * self.s + ) + + L = torch.linalg.cholesky(K) + logdet = -0.5 * 2 * torch.sum(torch.log(torch.diag(L))) + alpha = torch.solve(self.y, K)[0] + logprob = -0.5 * torch.mm(torch.t(self.y), alpha) + logdet + logprob = -logprob + return logprob + + def construct_likelihood_ratio(self, method="full"): + # for data + phis = self.phis.numpy() + counts = self.counts.numpy() + + # for constraints + varphis = self.varphis.numpy() + + # current fit + mean_theta = self.rate.numpy() + + if method == "split": + pass + elif method == "full": + self.likelihood = ( + -counts @ phis @ mean_theta + + np.log(1 + np.exp(phis @ mean_theta)) + + self.s * 0.5 * np.sum(mean_theta) ** 2 + ) + elif method == "cv": + pass + + def fit_gp(self, threads=4): + phis = self.phis.numpy() + + counts = self.counts.numpy() + theta = cp.Variable(self.get_m()) + objective = cp.Minimize( + -cp.sum(cp.multiply(counts, phis @ theta)) + + cp.sum(cp.logistic(phis @ theta)) + + self.s * 0.5 * cp.sum_squares(theta) + ) + + # probability constraints + constraints = [] + + prob = cp.Problem(objective, constraints) + prob.solve( + solver=cp.MOSEK, + warm_start=False, + verbose=False, + mosek_params={ + mosek.iparam.num_threads: threads, + mosek.iparam.intpnt_solve_form: mosek.solveform.primal, + mosek.dparam.intpnt_co_tol_pfeas: 1e-6, + mosek.dparam.intpnt_co_tol_dfeas: 1e-6, + mosek.dparam.intpnt_co_tol_rel_gap: 1e-6, + }, + ) + self.rate = torch.from_numpy(theta.value) + return self.rate + + def link(self, x): + return 1.0 / (1.0 + torch.exp(-x)) + + def mean_set(self, S): + return self.link(self.embed_set(S) @ self.rate) + + def ucb(self, S, beta=8.0, delta=0.1): + if self.uncertainty == "laplace": + ucb = ( + self.embed_set(S) @ self.rate + + beta * self.embed_set(S) @ self.invW @ self.embed_set(S).T + ) + return self.link(ucb) + elif self.uncertainty == "martingale": + phi = self.embed_set(S) + hat_theta = self.rate.numpy() + + def constraint_value_gradient(theta, beta): + y = cp.Variable(self.get_m()) + v = theta - hat_theta + objective2 = cp.Maximize(y @ v - cp.sum(cp.abs(self.phis @ y)) - beta) + + prob = cp.Problem(objective2) + prob.solve( + solver=cp.MOSEK, + warm_start=False, + verbose=False, + mosek_params={ + mosek.iparam.intpnt_solve_form: mosek.solveform.primal, + mosek.dparam.intpnt_co_tol_pfeas: 1e-4, + mosek.dparam.intpnt_co_tol_dfeas: 1e-4, + mosek.dparam.intpnt_co_tol_rel_gap: 1e-4, + }, + ) + print(prob.status) + return prob.value, y.value + + beta = 2.0 + iters = 10 + gamma = 0.00000001 + theta = hat_theta + print(theta) + + for k in range(iters): + print("Iter:", k) + d = cp.Variable(self.get_m()) + objective = cp.Minimize(phi @ d.T) + val, nabla = constraint_value_gradient(theta, beta) + constraints = [ + val + nabla.reshape(1, -1) @ d <= 0.0, + cp.sum_squares(d) <= gamma, + ] + prob = cp.Problem(objective, constraints) + prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False) + theta = theta + d.value + print(theta) + + return phi @ theta + + elif self.uncertainty == "ratio": + phi = self.embed_set(S) + phis = self.phis.numpy() + + counts = self.counts.numpy() + theta = cp.Variable(self.get_m()) + + objective = cp.Maximize(phi @ theta) + v = np.log(1.0 / delta) + self.likelihood + constraints = [ + -cp.sum(cp.multiply(counts, phis @ theta)) + + cp.sum(cp.logistic(phis @ theta)) + + self.s * 0.5 * cp.sum_squares(theta) + <= v + ] + + prob = cp.Problem(objective, constraints) + prob.solve( + solver=cp.MOSEK, + warm_start=False, + verbose=False, + mosek_params={ + mosek.iparam.intpnt_solve_form: mosek.solveform.primal, + mosek.dparam.intpnt_co_tol_pfeas: 1e-6, + mosek.dparam.intpnt_co_tol_dfeas: 1e-6, + mosek.dparam.intpnt_co_tol_rel_gap: 1e-6, + }, + ) + return self.link(phi @ theta.value) + + def lcb(self, S, beta=8.0, delta=0.1): + if self.uncertainty == "laplace": + lcb = ( + self.embed_set(S) @ self.rate + - beta * self.embed_set(S) @ self.invW @ self.embed_set(S).T + ) + return self.link(lcb) + elif self.uncertainty == "ratio": + phi = self.embed_set(S) + phis = self.phis.numpy() + + counts = self.counts.numpy() + theta = cp.Variable(self.get_m()) + + objective = cp.Minimize(phi @ theta) + v = np.log(1.0 / delta) + self.likelihood + constraints = [ + -cp.sum(cp.multiply(counts, phis @ theta)) + + cp.sum(cp.logistic(phis @ theta)) + + self.s * 0.5 * cp.sum_squares(theta) + <= v + ] + + prob = cp.Problem(objective, constraints) + prob.solve( + solver=cp.MOSEK, + warm_start=False, + verbose=False, + mosek_params={ + mosek.iparam.intpnt_solve_form: mosek.solveform.primal, + mosek.dparam.intpnt_co_tol_pfeas: 1e-6, + mosek.dparam.intpnt_co_tol_dfeas: 1e-6, + mosek.dparam.intpnt_co_tol_rel_gap: 1e-6, + }, + ) + return self.link(phi @ theta.value) + + def nabla(self, theta): + if self.data is not None: + return ( + -torch.einsum("i,ij->j", self.counts, self.phis).view(-1, 1) + + torch.einsum( + "i,ij,i->j", + self.pool, + self.phis, + 1.0 / (1.0 + torch.exp(self.phis @ theta).view(-1)), + ).view(-1, 1) + + self.s * theta.view(-1, 1) + ) + else: + return self.s * theta.view(-1, 1) + + def construct_covariance(self, theta): + W = torch.eye(self.get_m()).double() * self.s + torch.einsum( + "i,ij,ik->jk", + torch.exp(self.phis @ theta).view(-1) + / (1 + torch.exp(self.phis @ theta)).view(-1) ** 2, + self.phis, + self.phis, + ) + return W if __name__ == "__main__": - import matplotlib.pyplot as plt - from stpy.point_processes.binomial.binomial_process import BernoulliPointProcess - - d = 1 - gamma = 0.1 - n = 64 - m = 128 - levels = 7 - k = KernelFunction(gamma=gamma, kappa=1.) - - hierarchical_structure = HierarchicalBorelSets(d=1, interval=(-1, 1), levels=levels) - actions = hierarchical_structure.get_sets_level(levels) - dummy = torch.zeros(size=(1, d)).double() - - estimator = BernoulliRateEstimator(hierarchical_structure, m=64, kernel_object=k, s=0.001, uncertainty='ratio') - estimator_link = LinkBernoulliRateEstimator(hierarchical_structure, m=64, kernel_object=k, s=0.001, - uncertainty="ratio") - - rate = lambda S: np.sin(np.pi * S.return_discretization(n=1) ** 2) * 0.5 - process = BernoulliPointProcess(hierarchical_structure.get_sets_level(levels), rate=rate) - - N = 100 - - data = [] - for i in range(N): - data.append(process.sample(actions[torch.randint(0, len(actions), size=(1, 1))])) - - estimator.load_data(data) - estimator_link.load_data(data) - - estimator.fit_gp() - estimator_link.fit_gp() - - # plot observations - for datapoint in data: - S, v, _, _, _ = datapoint - x = S.return_discretization(n) - if v == 1: - plt.plot(x, x * 0, 'ko') - else: - plt.plot(x, x * 0, 'ro') - - xtest = hierarchical_structure.top_node.return_discretization(64) - plt.plot(xtest, estimator.mean_rate(hierarchical_structure.top_node, 64) * actions[0].volume(), 'tab:blue') - - samples = 0 - for i in range(samples): - estimator.sample(steps=100, verbose=False) - plt.plot(xtest, estimator.sample_path(hierarchical_structure.top_node, 64) * actions[0].volume(), 'g--') - - estimator.construct_confidence() - estimator.construct_likelihood_ratio() - - estimator_link.construct_confidence() - estimator_link.construct_likelihood_ratio() - # plot function - for action in actions: - val = estimator.mean_set(action) - val_link = estimator_link.mean_set(action) - - ucb, lcb = float(estimator.ucb(action)), float(estimator.lcb(action)) - ucb_link, lcb_link = float(estimator_link.ucb(action, delta=0.5)), float(estimator_link.lcb(action, delta=0.5)) - x = action.return_discretization(64) - plt.plot(x, x * 0 + rate(action), color='tab:red') - x = x.view(-1) - - plt.plot(x, x * 0 + val, color='tab:blue', linestyle='--') - plt.plot(x, x * 0 + val_link, color='tab:pink', linestyle='--') - plt.fill_between(x, x * 0 + lcb, x * 0 + ucb, color='tab:blue', alpha=0.2) - plt.fill_between(x, x * 0 + lcb_link, x * 0 + ucb_link, color='tab:pink', alpha=0.2) - - plt.show() + import matplotlib.pyplot as plt + from stpy.point_processes.binomial.binomial_process import BernoulliPointProcess + + d = 1 + gamma = 0.1 + n = 64 + m = 128 + levels = 7 + k = KernelFunction(gamma=gamma, kappa=1.0) + + hierarchical_structure = HierarchicalBorelSets(d=1, interval=(-1, 1), levels=levels) + actions = hierarchical_structure.get_sets_level(levels) + dummy = torch.zeros(size=(1, d)).double() + + estimator = BernoulliRateEstimator( + hierarchical_structure, m=64, kernel_object=k, s=0.001, uncertainty="ratio" + ) + estimator_link = LinkBernoulliRateEstimator( + hierarchical_structure, m=64, kernel_object=k, s=0.001, uncertainty="ratio" + ) + + rate = lambda S: np.sin(np.pi * S.return_discretization(n=1) ** 2) * 0.5 + process = BernoulliPointProcess( + hierarchical_structure.get_sets_level(levels), rate=rate + ) + + N = 100 + + data = [] + for i in range(N): + data.append( + process.sample(actions[torch.randint(0, len(actions), size=(1, 1))]) + ) + + estimator.load_data(data) + estimator_link.load_data(data) + + estimator.fit_gp() + estimator_link.fit_gp() + + # plot observations + for datapoint in data: + S, v, _, _, _ = datapoint + x = S.return_discretization(n) + if v == 1: + plt.plot(x, x * 0, "ko") + else: + plt.plot(x, x * 0, "ro") + + xtest = hierarchical_structure.top_node.return_discretization(64) + plt.plot( + xtest, + estimator.mean_rate(hierarchical_structure.top_node, 64) * actions[0].volume(), + "tab:blue", + ) + + samples = 0 + for i in range(samples): + estimator.sample(steps=100, verbose=False) + plt.plot( + xtest, + estimator.sample_path(hierarchical_structure.top_node, 64) + * actions[0].volume(), + "g--", + ) + + estimator.construct_confidence() + estimator.construct_likelihood_ratio() + + estimator_link.construct_confidence() + estimator_link.construct_likelihood_ratio() + # plot function + for action in actions: + val = estimator.mean_set(action) + val_link = estimator_link.mean_set(action) + + ucb, lcb = float(estimator.ucb(action)), float(estimator.lcb(action)) + ucb_link, lcb_link = float(estimator_link.ucb(action, delta=0.5)), float( + estimator_link.lcb(action, delta=0.5) + ) + x = action.return_discretization(64) + plt.plot(x, x * 0 + rate(action), color="tab:red") + x = x.view(-1) + + plt.plot(x, x * 0 + val, color="tab:blue", linestyle="--") + plt.plot(x, x * 0 + val_link, color="tab:pink", linestyle="--") + plt.fill_between(x, x * 0 + lcb, x * 0 + ucb, color="tab:blue", alpha=0.2) + plt.fill_between( + x, x * 0 + lcb_link, x * 0 + ucb_link, color="tab:pink", alpha=0.2 + ) + + plt.show() diff --git a/stpy/point_processes/link_fun_rate_estimator.py b/stpy/point_processes/link_fun_rate_estimator.py index 20cf463..afac5c7 100644 --- a/stpy/point_processes/link_fun_rate_estimator.py +++ b/stpy/point_processes/link_fun_rate_estimator.py @@ -8,8 +8,14 @@ import matplotlib.pyplot as plt from stpy.embeddings.embedding import HermiteEmbedding import scipy.integrate as integrate -from stpy.helpers.ellipsoid_algorithms import maximize_quadratic_on_ellipse, minimize_quadratic_on_ellipse -from stpy.helpers.ellipsoid_algorithms import maximize_matrix_quadratic_on_ellipse, minimize_matrix_quadratic_on_ellipse +from stpy.helpers.ellipsoid_algorithms import ( + maximize_quadratic_on_ellipse, + minimize_quadratic_on_ellipse, +) +from stpy.helpers.ellipsoid_algorithms import ( + maximize_matrix_quadratic_on_ellipse, + minimize_matrix_quadratic_on_ellipse, +) from stpy.point_processes.poisson import PoissonPointProcess from stpy.point_processes.poisson_rate_estimator import PositiveRateEstimator from stpy.borel_set import BorelSet, HierarchicalBorelSets @@ -17,451 +23,580 @@ ## implement loading data -class PermanentalProcessRateEstimator(PositiveRateEstimator): - - def __init__(self, *args, **kwargs): - super().__init__(*args,**kwargs) - - self.integration = "fixed_quad" - self.product_integrals = {} - self.varLambdas = torch.zeros(size=(len(self.basic_sets), self.get_m(),self.get_m())).double() - self.opt = 'cvxpy' - if self.feedback == "count-record" and self.estimator=="least-sq": - print ("precomputing-integrals:") - for index_set, set in enumerate(self.basic_sets): - print (index_set,"/",len(self.basic_sets)) - self.varLambdas[index_set, :] = self.product_integral(set) - self.variances[index_set] = set.volume() * self.B - - - def product_integral(self,S): - - if S in self.product_integrals.keys(): - return self.product_integrals[S] - else: - - if "product_integral" in dir(self.packing): - Psi = self.packing.product_integral(S) - self.product_integrals[S] = Psi - return Psi - - elif self.integration == "vec_quad": - - if S.d == 2: - #Psi = torch.zeros(size=(self.get_m(), self.get_m())).double() - F = lambda x: (self.packing.embed(x).view(-1, 1) @\ - self.packing.embed(x).view(1, -1)).view(-1) - integrand = lambda x, y: F(torch.Tensor([x, y]).view(1, 2).double()).numpy() - - val = quadvec2(integrand,float(S.bounds[0, 0]), float(S.bounds[0, 1]), - float(S.bounds[1, 0]), float(S.bounds[1, 1]),limit = 10,epsrel = 10e-3, epsabs = 10e-3, quadrature = 'gk15') - Psi = torch.from_numpy(val).view((self.get_m(), self.get_m())) - - elif self.integration == "fixed_quad": - - if S.d ==1: - weights, nodes = S.return_legendre_discretization(n=128) - Z = self.packing.embed(nodes) - M = torch.einsum('ij,ik->ijk', Z, Z) - Psi = torch.einsum('i,ijk->jk', weights, M) - - if S.d ==2: - weights, nodes = S.return_legendre_discretization(n = 50) - Z = self.packing.embed(nodes) - M = torch.einsum('ij,ik->ijk',Z,Z) - Psi = torch.einsum('i,ijk->jk',weights,M) - - else: - Psi = torch.zeros(size = (self.get_m(),self.get_m())).double() - for i in range(self.get_m()): - for j in range(self.get_m()): - - if S.d == 1: - F_ij = lambda x: (self.packing.embed(torch.from_numpy(np.array(x)).view(1, -1)).view(-1)[i] * - self.packing.embed(torch.from_numpy(np.array(x)).view(1, -1)).view(-1)[ - j]).numpy() - val, status = integrate.quad(F_ij,float(S.bounds[0,0]), float(S.bounds[0,1])) - - - elif S.d == 2: - F_ij = lambda x: self.packing.embed(x).view(-1)[i] *self.packing.embed(x).view(-1)[j] - integrand = lambda x, y: F_ij(torch.Tensor([x, y]).view(1, 2).double()).numpy() - val,status = integrate.dblquad(integrand, float(S.bounds[0, 0]), float(S.bounds[0, 1]), - lambda x: float(S.bounds[1, 0]), - lambda x: float(S.bounds[1, 1]),epsabs=1.49e-03, epsrel=1.49e-03) - else: - raise NotImplementedError("Integration above d>2 not implemented.") - - Psi[i,j] = val - print(i, j, val) - - self.product_integrals[S] = Psi - return Psi - - def get_constraints(self): - s = self.get_m() - l = np.full(s, self.b) - u = np.full(s, self.B) - Lambda = np.identity(s) - return (l,Lambda,u) - - def cov(self, inverse=False): - s = self.get_m() - - if inverse==False: - return torch.zeros(size = (s,s)).double() - else: - return torch.zeros(size=(s, s)).double(),torch.zeros(size=(s, s)).double() - - - def sample(self, verbose = False, steps = 10, stepsize = None): - - if self.data is None: - self.sampled_theta = torch.zeros(self.get_m()).double().view(-1,1) - return None - - if self.observations is not None: - observations = self.observations.double() - sumLambda = self.sumLambda.double() - nabla = lambda theta: -torch.sum(torch.diag(1. /(observations@theta).view(-1)) @ observations) \ - + (sumLambda.T + sumLambda) @ theta + self.s*theta.view(-1,1) - else: - sumLambda = self.sumLambda.double() - nabla = lambda theta: (sumLambda.T + sumLambda) @ theta + self.s*theta.view(-1,1) - - theta = self.rate.view(-1, 1) - - W = self.construct_covariance_matrix_laplace() - L = float(scipy.sparse.linalg.eigsh(W.numpy(), k=1, which='LM', return_eigenvectors=False, tol=1e-3)) - eta = 0.5 / (L + 1) - - for k in range(steps): - W = torch.randn(size=(self.get_m(), 1)).double() - theta = theta - eta * nabla(theta) + np.sqrt(2 * eta) * W - if verbose == True: - print("Iter:", k, theta.T) - - self.sampled_theta = theta - return None - - def sample_value(self, S): - """ - Given a pre-sampled value evaluate certain portions of the domain S - :param S: - :return: - """ - Z = self.product_integral(S) - map = self.sampled_theta.T@ Z @self.sampled_theta - return map - - - def sample_path(self, S, n=128): - xtest = S.return_discretization(n) - return (self.packing.embed(xtest) @ self.sampled_theta)**2 - - - - - def load_data(self,data): - super().load_data(data, times = False) - self.sumLambda = torch.zeros(size = (self.get_m(),self.get_m())) - if len(data) > 1: - for sample in data: - (S,obs,dt) = sample - self.sumLambda += self.product_integral(S) * dt - - def add_data_point(self, new_data): - super().add_data_point(new_data, times = False) - (S, obs, dt) = new_data - self.sumLambda += self.product_integral(S) * dt - - def penalized_likelihood(self, threads = 4): - sumLambda = self.sumLambda.numpy() - if self.observations is not None: - observations = self.observations.numpy() - loss = lambda theta: float(-np.sum(np.log( (observations@theta)**2 )) + np.dot(theta, sumLambda@theta) + 0.5*self.s*np.sum(theta**2)) - else: - loss = lambda theta: float(np.dot(theta, sumLambda @ theta) + 0.5*self.s * np.sum(theta ** 2)) - - theta = np.random.randn(self.get_m()) - res = minimize(loss, theta, jac=None, method='L-BFGS-B') - self.rate = torch.from_numpy(res.x) - return self.rate - - def construct_covariance_matrix_laplace(self): - W = torch.zeros(size=(self.get_m(), self.get_m())).double() - - if self.feedback == "count-record": - if self.observations is not None: - for i in range(self.observations.size()[0]): - A = self.observations[i, :].view(-1, 1) @ self.observations[i, :].view(1, -1) - k = np.maximum(torch.dot(self.observations[i, :],self.rate.view(-1)) ** 2,self.b) - W = W + A / k - W += 2*self.sumLambda - else: - raise AssertionError("Not implemented.") - return W + torch.eye(self.get_m()).double()*self.s - - - def map_lcb_ucb_approx_action(self, S, dt=1., beta=2.): - - phi = self.packing.integral(S) - map = (phi @ self.rate) - - ucb = np.maximum((map + beta*np.sqrt(phi@self.W_inv_approx@phi.T))**2,(map - beta*np.sqrt(phi@self.W_inv_approx@phi.T))**2) - ucb = np.minimum(ucb,self.B*S.volume()*dt) - lcb = 0. - - return dt*map**2, dt*lcb, dt*ucb - - def mean_std_per_action(self,S,W, dt , beta): - Z = self.product_integral(S) - - ucb, _ = maximize_matrix_quadratic_on_ellipse(Z.numpy(), (W).numpy(), self.rate.view(-1).numpy(), beta) - lcb, _ = minimize_matrix_quadratic_on_ellipse(Z.numpy(), (W).numpy(), self.rate.view(-1).numpy(), beta) - - map = self.rate.T @ Z @ self.rate - - return dt * map, dt * ucb, -lcb * dt - - def mean_rate(self, S, n=128): - xtest = S.return_discretization(n) - return (self.packing.embed(xtest) @ self.rate)**2 - - def mean_rate_latent(self,S,n = 128): - xtest = S.return_discretization(n) - return self.packing.embed(xtest) @ self.rate - - - def map_lcb_ucb_approx(self,S,n,beta = 2.0, delta = 0.01): - xtest = S.return_discretization(n) - if self.data is None: - return 0 * xtest[:, 0].view(-1, 1),self.b + 0 * xtest[:, 0].view(-1, 1), self.B + 0 * xtest[:,0].view(-1,xtest.size()[0]) - self.fit_ellipsoid_approx() - - Phi = self.packing.embed(xtest).double() - map = Phi @ self.rate - N = Phi.size()[0] - - ucb = torch.zeros(size=(N, 1)).double() - lcb = torch.zeros(size=(N, 1)).double() - - for i in range(N): - x = Phi[i, :].view(-1,1) - maximum = np.maximum((map[i] - beta * np.sqrt(x.T @ self.W_inv_approx @ x))**2, (map[i] + beta * np.sqrt(x.T @ self.W_inv_approx @ x))**2) - ucb[i,0] = np.minimum( maximum ,self.B) - lcb[i,0] = 0. - #lcb[i, 0] = map[i] - np.sqrt(beta) * np.sqrt(x.T @ self.W_inv_approx @ x) ** 2 - return map**2, lcb, ucb - - def map_lcb_ucb(self, S, n, beta = 2.0, delta = 0.01): - """ - Calculate exact confidence using laplace approximation on a whole set domain - :param S: set - :param n: discretization - :param beta: beta - :return: - """ - - xtest = S.return_discretization(n) - if self.data is None: - return self.b+0*xtest[:,0].view(-1,1),self.b+0*xtest[:,0].view(-1,1),self.B+0*xtest[:,0].view(-1,1) - - N = xtest.size()[0] - Phi = self.packing.embed(xtest) - map = (Phi @ self.rate)**2 - - if self.uncertainty == "laplace": - W = self.construct_covariance_matrix_laplace() - ucb = torch.zeros(size=(N, 1)).double() - lcb = torch.zeros(size=(N, 1)).double() - - for i in range(N): - x = Phi[i, :] - ucbi, _ = maximize_quadratic_on_ellipse(x.numpy(), (W).numpy(), self.rate.view(-1).numpy(), beta) - lcbi, _ = minimize_quadratic_on_ellipse(x.numpy(), (W).numpy(), self.rate.view(-1).numpy(), beta) - ucb[i, 0] = ucbi - lcb[i, 0] = lcbi +class PermanentalProcessRateEstimator(PositiveRateEstimator): - return map, lcb, ucb + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + self.integration = "fixed_quad" + self.product_integrals = {} + self.varLambdas = torch.zeros( + size=(len(self.basic_sets), self.get_m(), self.get_m()) + ).double() + self.opt = "cvxpy" + if self.feedback == "count-record" and self.estimator == "least-sq": + print("precomputing-integrals:") + for index_set, set in enumerate(self.basic_sets): + print(index_set, "/", len(self.basic_sets)) + self.varLambdas[index_set, :] = self.product_integral(set) + self.variances[index_set] = set.volume() * self.B + + def product_integral(self, S): + + if S in self.product_integrals.keys(): + return self.product_integrals[S] + else: + + if "product_integral" in dir(self.packing): + Psi = self.packing.product_integral(S) + self.product_integrals[S] = Psi + return Psi + + elif self.integration == "vec_quad": + + if S.d == 2: + # Psi = torch.zeros(size=(self.get_m(), self.get_m())).double() + F = lambda x: ( + self.packing.embed(x).view(-1, 1) + @ self.packing.embed(x).view(1, -1) + ).view(-1) + integrand = lambda x, y: F( + torch.Tensor([x, y]).view(1, 2).double() + ).numpy() + + val = quadvec2( + integrand, + float(S.bounds[0, 0]), + float(S.bounds[0, 1]), + float(S.bounds[1, 0]), + float(S.bounds[1, 1]), + limit=10, + epsrel=10e-3, + epsabs=10e-3, + quadrature="gk15", + ) + Psi = torch.from_numpy(val).view((self.get_m(), self.get_m())) + + elif self.integration == "fixed_quad": + + if S.d == 1: + weights, nodes = S.return_legendre_discretization(n=128) + Z = self.packing.embed(nodes) + M = torch.einsum("ij,ik->ijk", Z, Z) + Psi = torch.einsum("i,ijk->jk", weights, M) + + if S.d == 2: + weights, nodes = S.return_legendre_discretization(n=50) + Z = self.packing.embed(nodes) + M = torch.einsum("ij,ik->ijk", Z, Z) + Psi = torch.einsum("i,ijk->jk", weights, M) + + else: + Psi = torch.zeros(size=(self.get_m(), self.get_m())).double() + for i in range(self.get_m()): + for j in range(self.get_m()): + + if S.d == 1: + F_ij = lambda x: ( + self.packing.embed( + torch.from_numpy(np.array(x)).view(1, -1) + ).view(-1)[i] + * self.packing.embed( + torch.from_numpy(np.array(x)).view(1, -1) + ).view(-1)[j] + ).numpy() + val, status = integrate.quad( + F_ij, float(S.bounds[0, 0]), float(S.bounds[0, 1]) + ) + + elif S.d == 2: + F_ij = ( + lambda x: self.packing.embed(x).view(-1)[i] + * self.packing.embed(x).view(-1)[j] + ) + integrand = lambda x, y: F_ij( + torch.Tensor([x, y]).view(1, 2).double() + ).numpy() + val, status = integrate.dblquad( + integrand, + float(S.bounds[0, 0]), + float(S.bounds[0, 1]), + lambda x: float(S.bounds[1, 0]), + lambda x: float(S.bounds[1, 1]), + epsabs=1.49e-03, + epsrel=1.49e-03, + ) + else: + raise NotImplementedError( + "Integration above d>2 not implemented." + ) + + Psi[i, j] = val + print(i, j, val) + + self.product_integrals[S] = Psi + return Psi + + def get_constraints(self): + s = self.get_m() + l = np.full(s, self.b) + u = np.full(s, self.B) + Lambda = np.identity(s) + return (l, Lambda, u) + + def cov(self, inverse=False): + s = self.get_m() + + if inverse == False: + return torch.zeros(size=(s, s)).double() + else: + return torch.zeros(size=(s, s)).double(), torch.zeros(size=(s, s)).double() + + def sample(self, verbose=False, steps=10, stepsize=None): + + if self.data is None: + self.sampled_theta = torch.zeros(self.get_m()).double().view(-1, 1) + return None + + if self.observations is not None: + observations = self.observations.double() + sumLambda = self.sumLambda.double() + nabla = ( + lambda theta: -torch.sum( + torch.diag(1.0 / (observations @ theta).view(-1)) @ observations + ) + + (sumLambda.T + sumLambda) @ theta + + self.s * theta.view(-1, 1) + ) + else: + sumLambda = self.sumLambda.double() + nabla = lambda theta: ( + sumLambda.T + sumLambda + ) @ theta + self.s * theta.view(-1, 1) + + theta = self.rate.view(-1, 1) + + W = self.construct_covariance_matrix_laplace() + L = float( + scipy.sparse.linalg.eigsh( + W.numpy(), k=1, which="LM", return_eigenvectors=False, tol=1e-3 + ) + ) + eta = 0.5 / (L + 1) + + for k in range(steps): + W = torch.randn(size=(self.get_m(), 1)).double() + theta = theta - eta * nabla(theta) + np.sqrt(2 * eta) * W + if verbose == True: + print("Iter:", k, theta.T) + + self.sampled_theta = theta + return None + + def sample_value(self, S): + """ + Given a pre-sampled value evaluate certain portions of the domain S + :param S: + :return: + """ + Z = self.product_integral(S) + map = self.sampled_theta.T @ Z @ self.sampled_theta + return map + + def sample_path(self, S, n=128): + xtest = S.return_discretization(n) + return (self.packing.embed(xtest) @ self.sampled_theta) ** 2 + + def load_data(self, data): + super().load_data(data, times=False) + self.sumLambda = torch.zeros(size=(self.get_m(), self.get_m())) + if len(data) > 1: + for sample in data: + (S, obs, dt) = sample + self.sumLambda += self.product_integral(S) * dt + + def add_data_point(self, new_data): + super().add_data_point(new_data, times=False) + (S, obs, dt) = new_data + self.sumLambda += self.product_integral(S) * dt + + def penalized_likelihood(self, threads=4): + sumLambda = self.sumLambda.numpy() + if self.observations is not None: + observations = self.observations.numpy() + loss = lambda theta: float( + -np.sum(np.log((observations @ theta) ** 2)) + + np.dot(theta, sumLambda @ theta) + + 0.5 * self.s * np.sum(theta**2) + ) + else: + loss = lambda theta: float( + np.dot(theta, sumLambda @ theta) + 0.5 * self.s * np.sum(theta**2) + ) + + theta = np.random.randn(self.get_m()) + res = minimize(loss, theta, jac=None, method="L-BFGS-B") + self.rate = torch.from_numpy(res.x) + return self.rate + + def construct_covariance_matrix_laplace(self): + W = torch.zeros(size=(self.get_m(), self.get_m())).double() + + if self.feedback == "count-record": + if self.observations is not None: + for i in range(self.observations.size()[0]): + A = self.observations[i, :].view(-1, 1) @ self.observations[ + i, : + ].view(1, -1) + k = np.maximum( + torch.dot(self.observations[i, :], self.rate.view(-1)) ** 2, + self.b, + ) + W = W + A / k + W += 2 * self.sumLambda + else: + raise AssertionError("Not implemented.") + return W + torch.eye(self.get_m()).double() * self.s + + def map_lcb_ucb_approx_action(self, S, dt=1.0, beta=2.0): + + phi = self.packing.integral(S) + map = phi @ self.rate + + ucb = np.maximum( + (map + beta * np.sqrt(phi @ self.W_inv_approx @ phi.T)) ** 2, + (map - beta * np.sqrt(phi @ self.W_inv_approx @ phi.T)) ** 2, + ) + ucb = np.minimum(ucb, self.B * S.volume() * dt) + lcb = 0.0 + + return dt * map**2, dt * lcb, dt * ucb + + def mean_std_per_action(self, S, W, dt, beta): + Z = self.product_integral(S) + + ucb, _ = maximize_matrix_quadratic_on_ellipse( + Z.numpy(), (W).numpy(), self.rate.view(-1).numpy(), beta + ) + lcb, _ = minimize_matrix_quadratic_on_ellipse( + Z.numpy(), (W).numpy(), self.rate.view(-1).numpy(), beta + ) + + map = self.rate.T @ Z @ self.rate + + return dt * map, dt * ucb, -lcb * dt + + def mean_rate(self, S, n=128): + xtest = S.return_discretization(n) + return (self.packing.embed(xtest) @ self.rate) ** 2 + + def mean_rate_latent(self, S, n=128): + xtest = S.return_discretization(n) + return self.packing.embed(xtest) @ self.rate + + def map_lcb_ucb_approx(self, S, n, beta=2.0, delta=0.01): + xtest = S.return_discretization(n) + if self.data is None: + return ( + 0 * xtest[:, 0].view(-1, 1), + self.b + 0 * xtest[:, 0].view(-1, 1), + self.B + 0 * xtest[:, 0].view(-1, xtest.size()[0]), + ) + self.fit_ellipsoid_approx() + + Phi = self.packing.embed(xtest).double() + map = Phi @ self.rate + N = Phi.size()[0] + + ucb = torch.zeros(size=(N, 1)).double() + lcb = torch.zeros(size=(N, 1)).double() + + for i in range(N): + x = Phi[i, :].view(-1, 1) + maximum = np.maximum( + (map[i] - beta * np.sqrt(x.T @ self.W_inv_approx @ x)) ** 2, + (map[i] + beta * np.sqrt(x.T @ self.W_inv_approx @ x)) ** 2, + ) + ucb[i, 0] = np.minimum(maximum, self.B) + lcb[i, 0] = 0.0 + # lcb[i, 0] = map[i] - np.sqrt(beta) * np.sqrt(x.T @ self.W_inv_approx @ x) ** 2 + return map**2, lcb, ucb + + def map_lcb_ucb(self, S, n, beta=2.0, delta=0.01): + """ + Calculate exact confidence using laplace approximation on a whole set domain + :param S: set + :param n: discretization + :param beta: beta + :return: + """ + + xtest = S.return_discretization(n) + if self.data is None: + return ( + self.b + 0 * xtest[:, 0].view(-1, 1), + self.b + 0 * xtest[:, 0].view(-1, 1), + self.B + 0 * xtest[:, 0].view(-1, 1), + ) + + N = xtest.size()[0] + Phi = self.packing.embed(xtest) + map = (Phi @ self.rate) ** 2 + + if self.uncertainty == "laplace": + W = self.construct_covariance_matrix_laplace() + ucb = torch.zeros(size=(N, 1)).double() + lcb = torch.zeros(size=(N, 1)).double() + + for i in range(N): + x = Phi[i, :] + ucbi, _ = maximize_quadratic_on_ellipse( + x.numpy(), (W).numpy(), self.rate.view(-1).numpy(), beta + ) + lcbi, _ = minimize_quadratic_on_ellipse( + x.numpy(), (W).numpy(), self.rate.view(-1).numpy(), beta + ) + ucb[i, 0] = ucbi + lcb[i, 0] = lcbi + + return map, lcb, ucb class LogisticGaussProcessRateEstimator(PermanentalProcessRateEstimator): - def penalized_likelihood(self, threads=4): - logistic = lambda x: np.log(1 + np.exp(x)) - weights = self.weights.numpy() - nodes = self.nodes.numpy() - - if self.observations is not None: - observations = self.observations.numpy() - loss = lambda theta: float(-np.sum(np.log(logistic(observations @ theta))) + np.sum( - weights * logistic(theta @ nodes.T)) + self.s * np.sum(theta ** 2)) - else: - loss = lambda theta: float(np.sum(weights * logistic(theta @ nodes.T)) + self.s * np.sum(theta ** 2)) - - theta = np.random.randn(self.get_m()) - res = minimize(loss, theta, jac= None, method='L-BFGS-B',options={'maxcor': 20,'iprint':-1,'maxfun':150000,'maxls': 50}) - self.rate = torch.from_numpy(res.x) - - return self.rate - - def logistic(self, x): - return torch.log(1 + torch.exp(x)) - - def mean_rate(self, S, n=128): - xtest = S.return_discretization(n) - return self.logistic(self.packing.embed(xtest) @ self.rate) + def penalized_likelihood(self, threads=4): + logistic = lambda x: np.log(1 + np.exp(x)) + weights = self.weights.numpy() + nodes = self.nodes.numpy() + + if self.observations is not None: + observations = self.observations.numpy() + loss = lambda theta: float( + -np.sum(np.log(logistic(observations @ theta))) + + np.sum(weights * logistic(theta @ nodes.T)) + + self.s * np.sum(theta**2) + ) + else: + loss = lambda theta: float( + np.sum(weights * logistic(theta @ nodes.T)) + self.s * np.sum(theta**2) + ) + + theta = np.random.randn(self.get_m()) + res = minimize( + loss, + theta, + jac=None, + method="L-BFGS-B", + options={"maxcor": 20, "iprint": -1, "maxfun": 150000, "maxls": 50}, + ) + self.rate = torch.from_numpy(res.x) + + return self.rate + + def logistic(self, x): + return torch.log(1 + torch.exp(x)) + + def mean_rate(self, S, n=128): + xtest = S.return_discretization(n) + return self.logistic(self.packing.embed(xtest) @ self.rate) class ExpGaussProcessRateEstimator(PermanentalProcessRateEstimator): + def penalized_likelihood(self, threads=4): + weights = self.weights.numpy() + nodes = self.nodes.numpy() + + if self.observations is not None: + observations = self.observations.numpy() + loss = lambda theta: float( + np.sum(observations @ theta) + + np.sum(weights * np.exp(-theta @ nodes.T)) + + self.s * np.sum(theta**2) + ) + else: + loss = lambda theta: float( + np.sum(weights * np.exp(-theta @ nodes.T)) + self.s * np.sum(theta**2) + ) + + theta = np.zeros(self.get_m()) + res = minimize( + loss, + theta, + jac=None, + method="L-BFGS-B", + options={ + "maxcor": 20, + "iprint": -1, + "maxfun": 150000, + "maxls": 100, + "ftol": 1e-12, + "eps": 1e-12, + "gtol": 1e-8, + }, + ) + self.rate = torch.from_numpy(res.x) + + return self.rate + + def mean_rate(self, S, n=128): + xtest = S.return_discretization(n) + return torch.exp(-self.packing.embed(xtest) @ self.rate) - def penalized_likelihood(self, threads=4): - weights = self.weights.numpy() - nodes = self.nodes.numpy() - - if self.observations is not None: - observations = self.observations.numpy() - loss = lambda theta: float(np.sum(observations @ theta) + np.sum( - weights * np.exp(-theta @ nodes.T)) + self.s * np.sum(theta ** 2)) - else: - loss = lambda theta: float(np.sum(weights * np.exp(-theta @ nodes.T)) + self.s * np.sum(theta ** 2)) - - theta = np.zeros(self.get_m()) - res = minimize(loss, theta, jac= None, method='L-BFGS-B',options={'maxcor': 20,'iprint':-1, - 'maxfun':150000,'maxls': 100, - 'ftol':1e-12,'eps':1e-12,'gtol':1e-8}) - self.rate = torch.from_numpy(res.x) - - return self.rate - - def mean_rate(self, S, n=128): - xtest = S.return_discretization(n) - return torch.exp(-self.packing.embed(xtest) @ self.rate) if __name__ == "__main__": - torch.manual_seed(2) - np.random.seed(2) - d = 1 - gamma = 0.1 - n = 64 - B = 4. - b = 0.1 - - process = PoissonPointProcess(d=1, B=B, b=b) - Sets = [] - levels = 4 - hierarchical_structure = HierarchicalBorelSets(d=1, interval=(-1, 1), levels=levels) - Sets = hierarchical_structure.get_all_sets() - - D = BorelSet(1, bounds=torch.Tensor([[-1., 1.]]).double()) - - m = 64 - embedding = HermiteEmbedding(m = m, d = 1, gamma = gamma) - k = KernelFunction(gamma = gamma) - - estimator5 = PositiveRateEstimator(process, hierarchical_structure, kernel_object=k, B=B, m=m, d = d) - - estimator4 = PermanentalProcessRateEstimator(process, hierarchical_structure,kernel_object=k, B=B, m=m, d = d) - #estimator = PermanentalProcessRateEstimator(process, hierarchical_structure, - # kernel_object=k, B=B, m=m, d=d, embedding=embedding, basis = "custom", approx="ellipsoid") - #estimator = LogGaussProcessRateEstimator(process, hierarchical_structure, kernel_object=k, B=B, m=m, d=d, embedding=embedding, basis = "custom") - estimator = LogGaussProcessRateEstimator(process, hierarchical_structure, kernel_object=k, B=B+1, m=m, d=d, embedding=embedding) - - #estimator = LogisticGaussProcessRateEstimator(process, hierarchical_structure, kernel_object=k, B=B, m=m, d=d, embedding=embedding, basis = "custom") - estimator2 = LogisticGaussProcessRateEstimator(process, hierarchical_structure, kernel_object=k, B=B, m=m, d=d, embedding=embedding) - #estimator = ExpGaussProcessRateEstimator(process, hierarchical_structure, kernel_object=k, B=B, m=m, d=d, embedding=embedding, basis = "custom") - estimator3 = ExpGaussProcessRateEstimator(process, hierarchical_structure, kernel_object=k, B=B, m=m, d=d, embedding=embedding) - - estimators = [estimator,estimator2,estimator3,estimator4,estimator5] - names = ['sigmoid','logistic','exp','square','no-link'] - bands = [True,False,False,False,True] - - - estimators = [estimator,estimator5,estimator4] - names = ['sigmoid','no-link','square'] - bands = [False,False,False] - - min_vol, max_vol = estimator.get_min_max() - dt = 10. / (b * min_vol) - dt = dt * 2 - - print("Suggested dt:", dt) - c = ['k', 'r', 'b', 'y', 'g', 'orange', 'brown', 'purple'] + ['k' for i in range(500)] - - no_sets = len(Sets) - - - # no_samples = 3 - # data = [] - # samples = [] - # repeats = 2 - # - # for i in range(no_samples): - # j = np.random.randint(0, no_sets, 1) - # S = Sets[j[0]] - # for _ in range(repeats): - # sample = process.sample_discretized(S, dt) - # samples.append(sample) - # data.append((S, sample, dt)) - # - # sample_D = process.sample_discretized(D, dt) - # samples.append(sample_D) - # no_samples = repeats * no_samples + 1 - # data.append((D, sample_D, dt)) - - - data_single = [] - basic_sets = hierarchical_structure.get_sets_level(levels) - samples = [] - - for set in basic_sets: - sample = process.sample_discretized(set,dt) - data_single.append((set,sample,dt)) - samples.append(sample) - data = data_single - - # sample_D = torch.cat(samples) - # data = [(D,sample_D,dt)] - - # data2 = [] - # samples = [] - # for set in basic_sets: - # sample = process.sample_discretized(set,dt*2) - # data2.append((set,sample,dt*2)) - # samples.append(sample) - # - # sample_D_2 = torch.cat(samples) - # data = [(D, sample_D_2, dt*2)] - # - # data = data + data2 - - for estimator,name,band in zip(estimators,names,bands): - estimator.load_data(data) - - xtest = D.return_discretization(n=n) - - # likelihood based - estimator.fit_gp() - rate_mean = estimator.mean_rate(D,n = n) - p = plt.plot(xtest, rate_mean, label='likelihood: '+name) - - if band == True: - _, lcb, ucb = estimator.map_lcb_ucb(D, n, beta=2.) - plt.fill_between(xtest.numpy().flatten(), lcb.numpy().flatten(), ucb.numpy().flatten(), alpha=0.4, - color=p[0].get_color(), label=name) - - - - for j in range(len(samples)): - if samples[j] is not None: - plt.plot(samples[j], samples[j] * 0, 'o', color=c[j]) - - # for action in Sets: - # map, lcb, ucb = estimator.map_lcb_ucb_approx_action(action,beta=2.) - # x = np.linspace(action.bounds[0,0],action.bounds[0,1],2) - # plt.plot(x,x*0+float(ucb/action.volume()),'-o', color = "green") - process.visualize(D, samples=0, n=n, dt=1.) - plt.show() + torch.manual_seed(2) + np.random.seed(2) + d = 1 + gamma = 0.1 + n = 64 + B = 4.0 + b = 0.1 + + process = PoissonPointProcess(d=1, B=B, b=b) + Sets = [] + levels = 4 + hierarchical_structure = HierarchicalBorelSets(d=1, interval=(-1, 1), levels=levels) + Sets = hierarchical_structure.get_all_sets() + + D = BorelSet(1, bounds=torch.Tensor([[-1.0, 1.0]]).double()) + + m = 64 + embedding = HermiteEmbedding(m=m, d=1, gamma=gamma) + k = KernelFunction(gamma=gamma) + + estimator5 = PositiveRateEstimator( + process, hierarchical_structure, kernel_object=k, B=B, m=m, d=d + ) + + estimator4 = PermanentalProcessRateEstimator( + process, hierarchical_structure, kernel_object=k, B=B, m=m, d=d + ) + # estimator = PermanentalProcessRateEstimator(process, hierarchical_structure, + # kernel_object=k, B=B, m=m, d=d, embedding=embedding, basis = "custom", approx="ellipsoid") + # estimator = LogGaussProcessRateEstimator(process, hierarchical_structure, kernel_object=k, B=B, m=m, d=d, embedding=embedding, basis = "custom") + estimator = LogGaussProcessRateEstimator( + process, + hierarchical_structure, + kernel_object=k, + B=B + 1, + m=m, + d=d, + embedding=embedding, + ) + + # estimator = LogisticGaussProcessRateEstimator(process, hierarchical_structure, kernel_object=k, B=B, m=m, d=d, embedding=embedding, basis = "custom") + estimator2 = LogisticGaussProcessRateEstimator( + process, + hierarchical_structure, + kernel_object=k, + B=B, + m=m, + d=d, + embedding=embedding, + ) + # estimator = ExpGaussProcessRateEstimator(process, hierarchical_structure, kernel_object=k, B=B, m=m, d=d, embedding=embedding, basis = "custom") + estimator3 = ExpGaussProcessRateEstimator( + process, + hierarchical_structure, + kernel_object=k, + B=B, + m=m, + d=d, + embedding=embedding, + ) + + estimators = [estimator, estimator2, estimator3, estimator4, estimator5] + names = ["sigmoid", "logistic", "exp", "square", "no-link"] + bands = [True, False, False, False, True] + + estimators = [estimator, estimator5, estimator4] + names = ["sigmoid", "no-link", "square"] + bands = [False, False, False] + + min_vol, max_vol = estimator.get_min_max() + dt = 10.0 / (b * min_vol) + dt = dt * 2 + + print("Suggested dt:", dt) + c = ["k", "r", "b", "y", "g", "orange", "brown", "purple"] + [ + "k" for i in range(500) + ] + + no_sets = len(Sets) + + # no_samples = 3 + # data = [] + # samples = [] + # repeats = 2 + # + # for i in range(no_samples): + # j = np.random.randint(0, no_sets, 1) + # S = Sets[j[0]] + # for _ in range(repeats): + # sample = process.sample_discretized(S, dt) + # samples.append(sample) + # data.append((S, sample, dt)) + # + # sample_D = process.sample_discretized(D, dt) + # samples.append(sample_D) + # no_samples = repeats * no_samples + 1 + # data.append((D, sample_D, dt)) + + data_single = [] + basic_sets = hierarchical_structure.get_sets_level(levels) + samples = [] + + for set in basic_sets: + sample = process.sample_discretized(set, dt) + data_single.append((set, sample, dt)) + samples.append(sample) + data = data_single + + # sample_D = torch.cat(samples) + # data = [(D,sample_D,dt)] + + # data2 = [] + # samples = [] + # for set in basic_sets: + # sample = process.sample_discretized(set,dt*2) + # data2.append((set,sample,dt*2)) + # samples.append(sample) + # + # sample_D_2 = torch.cat(samples) + # data = [(D, sample_D_2, dt*2)] + # + # data = data + data2 + + for estimator, name, band in zip(estimators, names, bands): + estimator.load_data(data) + + xtest = D.return_discretization(n=n) + + # likelihood based + estimator.fit_gp() + rate_mean = estimator.mean_rate(D, n=n) + p = plt.plot(xtest, rate_mean, label="likelihood: " + name) + + if band == True: + _, lcb, ucb = estimator.map_lcb_ucb(D, n, beta=2.0) + plt.fill_between( + xtest.numpy().flatten(), + lcb.numpy().flatten(), + ucb.numpy().flatten(), + alpha=0.4, + color=p[0].get_color(), + label=name, + ) + + for j in range(len(samples)): + if samples[j] is not None: + plt.plot(samples[j], samples[j] * 0, "o", color=c[j]) + + # for action in Sets: + # map, lcb, ucb = estimator.map_lcb_ucb_approx_action(action,beta=2.) + # x = np.linspace(action.bounds[0,0],action.bounds[0,1],2) + # plt.plot(x,x*0+float(ucb/action.volume()),'-o', color = "green") + process.visualize(D, samples=0, n=n, dt=1.0) + plt.show() diff --git a/stpy/point_processes/log_link_rate_estimator.py b/stpy/point_processes/log_link_rate_estimator.py index 006470c..54dcfef 100644 --- a/stpy/point_processes/log_link_rate_estimator.py +++ b/stpy/point_processes/log_link_rate_estimator.py @@ -3,215 +3,267 @@ import torch from scipy.optimize import minimize -from stpy.point_processes.poisson.link_fun_rate_estimator import PermanentalProcessRateEstimator +from stpy.point_processes.poisson.link_fun_rate_estimator import ( + PermanentalProcessRateEstimator, +) class LogGaussProcessRateEstimator(PermanentalProcessRateEstimator): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.discretization = 64 - - self.nodes = None - self.weights = None - - def load_data(self, data): - super().load_data(data) - - if len(data) > 1: - weights_arr = [] - nodes_arr = [] - for sample in data: - (S, obs, dt) = sample - weights, nodes = S.return_legendre_discretization(self.discretization) - nodes_arr.append(nodes) - weights_arr.append(weights * dt) - - self.nodes = self.packing.embed(torch.cat(nodes_arr)) - self.weights = torch.cat(weights_arr) - - def add_data_point(self, new_data): - super().add_data_point(new_data) - - S, obs, dt = new_data - weights, nodes = S.return_legendre_discretization(self.discretization) - - if self.nodes is None: - self.nodes = self.packing.embed(nodes) - self.weights = weights * dt - else: - self.nodes = torch.cat((self.nodes, self.packing.embed(nodes))) - self.weights = torch.cat((self.weights, weights * dt)) - - def sample(self, verbose=False, steps=100, stepsize=None): - - sigmoid_der_1 = lambda x: torch.exp(-x) / (torch.exp(-x) + 1) ** 2 - - if self.data is None: - self.sampled_theta = torch.zeros(self.get_m()).double().view(-1, 1) - return None - - if self.observations is not None: - weights = self.weights - nodes = self.nodes - - nabla = lambda theta: -torch.sum( - torch.diag( - sigmoid_der_1(self.observations @ theta).view(-1) / self.sigmoid(self.observations @ theta).view( - -1)) @ self.observations, dim=0).view(-1, 1) \ - + self.B * torch.sum( - torch.diag(weights.view(-1) * sigmoid_der_1(nodes @ theta).view(-1)) @ nodes, dim=0).view(-1, - 1) + self.s * theta.view( - -1, 1) - else: - weights = self.weights - nodes = self.nodes - nabla = lambda theta: self.B * torch.sum( - torch.diag(weights.view(-1) * sigmoid_der_1(nodes @ theta).view(-1)) @ nodes, dim=0).view(-1, - 1) + self.s * theta.view( - -1, 1) - - # theta = self.rate.view(-1, 1)*np.nan - - # while torch.sum(torch.isnan(theta))>0: - - theta = self.rate.view(-1, 1) - for k in range(steps): - - W = self.construct_covariance_matrix_laplace(theta.view(-1)) - L = float(scipy.sparse.linalg.eigsh(W.numpy(), k=1, which='LM', return_eigenvectors=False, tol=1e-4)) - eta = 0.5 / (L + 1) - print(eta) - s = torch.randn(size=(self.get_m(), 1)).double() - theta = theta - eta * nabla(theta) + np.sqrt(2 * eta) * s - - if verbose == True: - print("Iter:", k, theta.T) - - self.sampled_theta = theta - return None - - def sample_value(self, S): - """ - Given a pre-sampled value evaluate certain portions of the domain S - :param S: - :return: - """ - weights, nodes = S.return_legendre_discretization(64) - Phi = self.packing.embed(nodes) - map_vals = torch.sum(weights * self.B * self.sigmoid(Phi @ self.sampled_theta)) - return map_vals - - def sample_path(self, S, n=128): - xtest = S.return_discretization(n) - return self.sigmoid(self.packing.embed(xtest) @ self.sampled_theta) * self.B - - def penalized_likelihood(self, threads=4): - sigmoid = lambda x: 1. / (1. + np.exp(-x)) - weights = self.weights.numpy() - nodes = self.nodes.numpy() - # times = self.times.numpy() - # times = self.times.numpy() - - if self.observations is not None: - observations = self.observations.numpy() - # loss = lambda theta: float(-np.sum(np.log(self.B * sigmoid(observations @ theta))) \ - # + self.B * np.einsum('i,i',(weights ,sigmoid(nodes @ theta))) + self.s * np.sum(theta ** 2)) - loss = lambda theta: float(-np.sum(np.log(self.B * sigmoid(observations @ theta))) \ - + self.B * np.sum( - weights * sigmoid(nodes @ theta).reshape(-1)) + 0.5 * self.s * np.sum(theta ** 2)) - - else: - loss = lambda theta: float( - +self.B * np.sum(weights * sigmoid(theta @ nodes.T)) + self.s * np.sum(theta ** 2)) - - theta = np.zeros(self.get_m()) - res = minimize(loss, theta, jac=None, method='L-BFGS-B', options={'maxcor': 20, 'iprint': -1, - 'maxfun': 150000, 'maxls': 50, 'ftol': 1e-12, - 'eps': 1e-12, 'gtol': 1e-8}) - - self.rate = torch.from_numpy(res.x) - - return self.rate - - def construct_covariance_matrix_laplace(self, theta=None): - sigmoid_der_1 = lambda x: np.exp(-x) / (np.exp(-x) + 1) ** 2 - sigmoid_der_2 = lambda x: 2 * np.exp(-2 * x) / (np.exp(-x) + 1) ** 3 - np.exp(-x) / (np.exp(-x) + 1) ** 2 - sigmoid = lambda x: 1. / (1. + np.exp(-x)) - - W = torch.zeros(size=(self.get_m(), self.get_m())).double() - - if self.feedback == "count-record": - if self.observations is not None: - if theta is None: - input = (self.observations @ self.rate).view(-1) - else: - input = (self.observations @ theta).view(-1) - scales = (sigmoid_der_1(input) ** 2 + sigmoid_der_2(input) * sigmoid(input)) / (sigmoid(input) ** 2) - W = torch.einsum('ij,i,ik->jk', self.observations, scales, self.observations) - - if self.nodes is not None: - if theta is None: - scales = self.B * sigmoid_der_2(self.nodes @ self.rate) * self.weights - else: - scales = self.B * sigmoid_der_2(self.nodes @ theta) * self.weights - Z = torch.einsum('ij,i,ik->jk', self.nodes, scales, self.nodes) - W = W + Z - - else: - raise AssertionError("Not implemented.") - return W + torch.eye(self.get_m()).double() * self.s - - def mean_var_laplace_set(self, S, dt, beta=2.): - if self.approx_fit == False: - self.W = self.construct_covariance_matrix_laplace() - self.approx_fit = True - self.W_inv_approx = torch.pinverse(self.W) - return self.mean_std_per_action(S, self.W, dt, beta) - - def mean_std_per_action(self, S, W, dt, beta): - weights, nodes = S.return_legendre_discretization(64) - Phi = self.packing.embed(nodes) - vars = torch.einsum('ij,jk,ki->i', Phi, self.W_inv_approx, Phi.T) - - vars = (vars + np.abs(vars)) / 2 - map_vals = weights * self.B * self.sigmoid(Phi @ self.rate) - lcb_vals = weights * self.B * self.sigmoid(Phi @ self.rate - beta * np.sqrt(vars)) - ucb_vals = weights * self.B * self.sigmoid(Phi @ self.rate + beta * np.sqrt(vars)) - - return dt * torch.sum(map_vals), dt * torch.sum(ucb_vals), torch, sum(lcb_vals) * dt - - def map_lcb_ucb(self, S, n, beta=2.0, delta=0.01): - """ - Calculate exact confidence using laplace approximation on a whole set domain - :param S: set - :param n: discretization - :param beta: beta - :return: - """ - - xtest = S.return_discretization(n) - if self.data is None: - return self.b + 0 * xtest[:, 0].view(-1, 1), self.b + 0 * xtest[:, 0].view(-1, 1), self.B + 0 * xtest[:, - 0].view(-1, - 1) - - Phi = self.packing.embed(xtest) - map = self.B * self.sigmoid(Phi @ self.rate) - - if self.uncertainty == "laplace": - W = self.construct_covariance_matrix_laplace() - W_inv = torch.pinverse(W) - - vars = torch.einsum('ij,jk,ki->i', Phi, W_inv, Phi.T) - lcb = self.B * self.sigmoid(Phi @ self.rate - beta * np.sqrt(vars)) - ucb = self.B * self.sigmoid(Phi @ self.rate + beta * np.sqrt(vars)) - - return map, lcb, ucb - - def sigmoid(self, x): - return 1. / (1. + torch.exp(-x)) - - def mean_rate(self, S, n=128): - xtest = S.return_discretization(n) - return self.sigmoid(self.packing.embed(xtest) @ self.rate) * self.B + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.discretization = 64 + + self.nodes = None + self.weights = None + + def load_data(self, data): + super().load_data(data) + + if len(data) > 1: + weights_arr = [] + nodes_arr = [] + for sample in data: + (S, obs, dt) = sample + weights, nodes = S.return_legendre_discretization(self.discretization) + nodes_arr.append(nodes) + weights_arr.append(weights * dt) + + self.nodes = self.packing.embed(torch.cat(nodes_arr)) + self.weights = torch.cat(weights_arr) + + def add_data_point(self, new_data): + super().add_data_point(new_data) + + S, obs, dt = new_data + weights, nodes = S.return_legendre_discretization(self.discretization) + + if self.nodes is None: + self.nodes = self.packing.embed(nodes) + self.weights = weights * dt + else: + self.nodes = torch.cat((self.nodes, self.packing.embed(nodes))) + self.weights = torch.cat((self.weights, weights * dt)) + + def sample(self, verbose=False, steps=100, stepsize=None): + + sigmoid_der_1 = lambda x: torch.exp(-x) / (torch.exp(-x) + 1) ** 2 + + if self.data is None: + self.sampled_theta = torch.zeros(self.get_m()).double().view(-1, 1) + return None + + if self.observations is not None: + weights = self.weights + nodes = self.nodes + + nabla = ( + lambda theta: -torch.sum( + torch.diag( + sigmoid_der_1(self.observations @ theta).view(-1) + / self.sigmoid(self.observations @ theta).view(-1) + ) + @ self.observations, + dim=0, + ).view(-1, 1) + + self.B + * torch.sum( + torch.diag(weights.view(-1) * sigmoid_der_1(nodes @ theta).view(-1)) + @ nodes, + dim=0, + ).view(-1, 1) + + self.s * theta.view(-1, 1) + ) + else: + weights = self.weights + nodes = self.nodes + nabla = lambda theta: self.B * torch.sum( + torch.diag(weights.view(-1) * sigmoid_der_1(nodes @ theta).view(-1)) + @ nodes, + dim=0, + ).view(-1, 1) + self.s * theta.view(-1, 1) + + # theta = self.rate.view(-1, 1)*np.nan + + # while torch.sum(torch.isnan(theta))>0: + + theta = self.rate.view(-1, 1) + for k in range(steps): + + W = self.construct_covariance_matrix_laplace(theta.view(-1)) + L = float( + scipy.sparse.linalg.eigsh( + W.numpy(), k=1, which="LM", return_eigenvectors=False, tol=1e-4 + ) + ) + eta = 0.5 / (L + 1) + print(eta) + s = torch.randn(size=(self.get_m(), 1)).double() + theta = theta - eta * nabla(theta) + np.sqrt(2 * eta) * s + + if verbose == True: + print("Iter:", k, theta.T) + + self.sampled_theta = theta + return None + + def sample_value(self, S): + """ + Given a pre-sampled value evaluate certain portions of the domain S + :param S: + :return: + """ + weights, nodes = S.return_legendre_discretization(64) + Phi = self.packing.embed(nodes) + map_vals = torch.sum(weights * self.B * self.sigmoid(Phi @ self.sampled_theta)) + return map_vals + + def sample_path(self, S, n=128): + xtest = S.return_discretization(n) + return self.sigmoid(self.packing.embed(xtest) @ self.sampled_theta) * self.B + + def penalized_likelihood(self, threads=4): + sigmoid = lambda x: 1.0 / (1.0 + np.exp(-x)) + weights = self.weights.numpy() + nodes = self.nodes.numpy() + # times = self.times.numpy() + # times = self.times.numpy() + + if self.observations is not None: + observations = self.observations.numpy() + # loss = lambda theta: float(-np.sum(np.log(self.B * sigmoid(observations @ theta))) \ + # + self.B * np.einsum('i,i',(weights ,sigmoid(nodes @ theta))) + self.s * np.sum(theta ** 2)) + loss = lambda theta: float( + -np.sum(np.log(self.B * sigmoid(observations @ theta))) + + self.B * np.sum(weights * sigmoid(nodes @ theta).reshape(-1)) + + 0.5 * self.s * np.sum(theta**2) + ) + + else: + loss = lambda theta: float( + +self.B * np.sum(weights * sigmoid(theta @ nodes.T)) + + self.s * np.sum(theta**2) + ) + + theta = np.zeros(self.get_m()) + res = minimize( + loss, + theta, + jac=None, + method="L-BFGS-B", + options={ + "maxcor": 20, + "iprint": -1, + "maxfun": 150000, + "maxls": 50, + "ftol": 1e-12, + "eps": 1e-12, + "gtol": 1e-8, + }, + ) + + self.rate = torch.from_numpy(res.x) + + return self.rate + + def construct_covariance_matrix_laplace(self, theta=None): + sigmoid_der_1 = lambda x: np.exp(-x) / (np.exp(-x) + 1) ** 2 + sigmoid_der_2 = ( + lambda x: 2 * np.exp(-2 * x) / (np.exp(-x) + 1) ** 3 + - np.exp(-x) / (np.exp(-x) + 1) ** 2 + ) + sigmoid = lambda x: 1.0 / (1.0 + np.exp(-x)) + + W = torch.zeros(size=(self.get_m(), self.get_m())).double() + + if self.feedback == "count-record": + if self.observations is not None: + if theta is None: + input = (self.observations @ self.rate).view(-1) + else: + input = (self.observations @ theta).view(-1) + scales = ( + sigmoid_der_1(input) ** 2 + sigmoid_der_2(input) * sigmoid(input) + ) / (sigmoid(input) ** 2) + W = torch.einsum( + "ij,i,ik->jk", self.observations, scales, self.observations + ) + + if self.nodes is not None: + if theta is None: + scales = ( + self.B * sigmoid_der_2(self.nodes @ self.rate) * self.weights + ) + else: + scales = self.B * sigmoid_der_2(self.nodes @ theta) * self.weights + Z = torch.einsum("ij,i,ik->jk", self.nodes, scales, self.nodes) + W = W + Z + + else: + raise AssertionError("Not implemented.") + return W + torch.eye(self.get_m()).double() * self.s + + def mean_var_laplace_set(self, S, dt, beta=2.0): + if self.approx_fit == False: + self.W = self.construct_covariance_matrix_laplace() + self.approx_fit = True + self.W_inv_approx = torch.pinverse(self.W) + return self.mean_std_per_action(S, self.W, dt, beta) + + def mean_std_per_action(self, S, W, dt, beta): + weights, nodes = S.return_legendre_discretization(64) + Phi = self.packing.embed(nodes) + vars = torch.einsum("ij,jk,ki->i", Phi, self.W_inv_approx, Phi.T) + + vars = (vars + np.abs(vars)) / 2 + map_vals = weights * self.B * self.sigmoid(Phi @ self.rate) + lcb_vals = ( + weights * self.B * self.sigmoid(Phi @ self.rate - beta * np.sqrt(vars)) + ) + ucb_vals = ( + weights * self.B * self.sigmoid(Phi @ self.rate + beta * np.sqrt(vars)) + ) + + return ( + dt * torch.sum(map_vals), + dt * torch.sum(ucb_vals), + torch, + sum(lcb_vals) * dt, + ) + + def map_lcb_ucb(self, S, n, beta=2.0, delta=0.01): + """ + Calculate exact confidence using laplace approximation on a whole set domain + :param S: set + :param n: discretization + :param beta: beta + :return: + """ + + xtest = S.return_discretization(n) + if self.data is None: + return ( + self.b + 0 * xtest[:, 0].view(-1, 1), + self.b + 0 * xtest[:, 0].view(-1, 1), + self.B + 0 * xtest[:, 0].view(-1, 1), + ) + + Phi = self.packing.embed(xtest) + map = self.B * self.sigmoid(Phi @ self.rate) + + if self.uncertainty == "laplace": + W = self.construct_covariance_matrix_laplace() + W_inv = torch.pinverse(W) + + vars = torch.einsum("ij,jk,ki->i", Phi, W_inv, Phi.T) + lcb = self.B * self.sigmoid(Phi @ self.rate - beta * np.sqrt(vars)) + ucb = self.B * self.sigmoid(Phi @ self.rate + beta * np.sqrt(vars)) + + return map, lcb, ucb + + def sigmoid(self, x): + return 1.0 / (1.0 + torch.exp(-x)) + + def mean_rate(self, S, n=128): + xtest = S.return_discretization(n) + return self.sigmoid(self.packing.embed(xtest) @ self.rate) * self.B diff --git a/stpy/point_processes/loglinear_estimator.py b/stpy/point_processes/loglinear_estimator.py index 221880a..d971b08 100644 --- a/stpy/point_processes/loglinear_estimator.py +++ b/stpy/point_processes/loglinear_estimator.py @@ -8,179 +8,207 @@ import matplotlib.pyplot as plt from stpy.embeddings.embedding import HermiteEmbedding import scipy.integrate as integrate -from stpy.helpers.ellipsoid_algorithms import maximize_quadratic_on_ellipse, minimize_quadratic_on_ellipse +from stpy.helpers.ellipsoid_algorithms import ( + maximize_quadratic_on_ellipse, + minimize_quadratic_on_ellipse, +) from stpy.point_processes.poisson import PoissonPointProcess from stpy.point_processes.poisson_rate_estimator import PositiveRateEstimator from stpy.borel_set import BorelSet, HierarchicalBorelSets from stpy.kernels import KernelFunction -class LogLinearRateEstimator(PositiveRateEstimator): - - def __init__(self,*args,**kwargs): - super().__init__(*args,**kwargs) - - def least_squares_weighted(self, threads=0): - theta = cp.Variable(self.get_m()) - - mask = self.bucketized_counts.clone().numpy() > 0 - - observations = self.total_bucketized_obs[mask].clone().numpy() - phis = self.varphis[mask, :].clone().numpy() - tau = self.total_bucketized_time.clone().numpy() - - variances = self.variances.view(-1).clone().numpy() - - for i in range(variances.shape[0]): - if mask[i] > 0: - variances[i] = variances[i] * tau[i] * self.variance_correction(variances[i] * tau[i]) - - selected_variances = variances[mask] - print (np.log(observations)) - print (selected_variances) - objective = cp.Minimize( - cp.sum_squares((phis @ theta) - np.log(observations)/tau[mask]) )#+ self.s * cp.norm2(theta)) - - prob = cp.Problem(objective) - - prob.solve(solver=cp.MOSEK, warm_start=False, verbose=True, - mosek_params={mosek.iparam.num_threads: threads}) - - self.rate = torch.from_numpy(theta.value) - print (self.rate) - return self.rate - - def mean_var_reg_set(self, S, dt=1., beta=2.): - if self.approx_fit == False: - self.W = self.construct_covariance_matrix_regression() - self.approx_fit = True - - map = 0 - lcb = 0 - ucb = 0 - for set in self.basic_sets: - if S.inside(set): - x = self.packing.integral(set).view(-1,1) - lcb = lcb +torch.exp(dt*(x@self.rate - beta*np.sqrt(x.T@self.W_inv@x) )) - ucb = ucb + torch.exp(dt*(x@self.rate + beta*np.sqrt(x.T@self.W_inv@x))) - map = map + torch.exp(dt*x@self.rate) - return map,ucb, lcb - - def fit_ellipsoid_approx(self): - self.W =self.construct_covariance_matrix_regression() - self.W_inv = torch.pinverse(self.W) - - # def map_lcb_ucb_approx_action(self, S, dt=1., beta=2.): - # phi = self.packing.integral(S) * dt - # map = phi @ self.rate - # ucb = map + beta * np.sqrt(phi @ self.W_inv_approx @ phi.T) - # ucb = np.minimum(ucb, self.B * S.volume() * dt) - # - # lcb = map - beta * np.sqrt(phi @ self.W_inv_approx @ phi.T) - # lcb = np.maximum(lcb, self.b * S.volume() * dt) - # return map, lcb, ucb - - - def construct_covariance_matrix_regression(self): - - W = torch.zeros(size=(self.get_m(), self.get_m())).double() - - if self.data is not None: - variances = self.variances - - if self.feedback == "count-record": - mask = self.bucketized_counts > 0 - tau = self.total_bucketized_time - for index_o, o in enumerate(self.bucketized_obs): - n = mask[index_o] - if n > 0: - A = self.varphis[index_o, :].view(-1, 1) @ self.varphis[index_o, :].view(1, -1) * tau[index_o] - W = W + A / (variances[index_o]) - - elif self.feedback == "histogram": - - for datapoint in self.data: - (S, obs, dt) = datapoint - varphi = self.packing.integral(S) * dt - variance = varphi@self.rate - variance = variance - A = varphi.view(-1, 1) @ varphi.view(1, -1) - W = W + A / variance - - return W + torch.eye(self.get_m()).double() * self.s +class LogLinearRateEstimator(PositiveRateEstimator): - def mean_set(self, S, dt=1.): - mu = 0 - for set in self.basic_sets: - if S.inside(set): - mu = mu + torch.exp(dt*self.packing.integral(set)@self.rate) - return mu + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def least_squares_weighted(self, threads=0): + theta = cp.Variable(self.get_m()) + + mask = self.bucketized_counts.clone().numpy() > 0 + + observations = self.total_bucketized_obs[mask].clone().numpy() + phis = self.varphis[mask, :].clone().numpy() + tau = self.total_bucketized_time.clone().numpy() + + variances = self.variances.view(-1).clone().numpy() + + for i in range(variances.shape[0]): + if mask[i] > 0: + variances[i] = ( + variances[i] + * tau[i] + * self.variance_correction(variances[i] * tau[i]) + ) + + selected_variances = variances[mask] + print(np.log(observations)) + print(selected_variances) + objective = cp.Minimize( + cp.sum_squares((phis @ theta) - np.log(observations) / tau[mask]) + ) # + self.s * cp.norm2(theta)) + + prob = cp.Problem(objective) + + prob.solve( + solver=cp.MOSEK, + warm_start=False, + verbose=True, + mosek_params={mosek.iparam.num_threads: threads}, + ) + + self.rate = torch.from_numpy(theta.value) + print(self.rate) + return self.rate + + def mean_var_reg_set(self, S, dt=1.0, beta=2.0): + if self.approx_fit == False: + self.W = self.construct_covariance_matrix_regression() + self.approx_fit = True + + map = 0 + lcb = 0 + ucb = 0 + for set in self.basic_sets: + if S.inside(set): + x = self.packing.integral(set).view(-1, 1) + lcb = lcb + torch.exp( + dt * (x @ self.rate - beta * np.sqrt(x.T @ self.W_inv @ x)) + ) + ucb = ucb + torch.exp( + dt * (x @ self.rate + beta * np.sqrt(x.T @ self.W_inv @ x)) + ) + map = map + torch.exp(dt * x @ self.rate) + return map, ucb, lcb + + def fit_ellipsoid_approx(self): + self.W = self.construct_covariance_matrix_regression() + self.W_inv = torch.pinverse(self.W) + + # def map_lcb_ucb_approx_action(self, S, dt=1., beta=2.): + # phi = self.packing.integral(S) * dt + # map = phi @ self.rate + # ucb = map + beta * np.sqrt(phi @ self.W_inv_approx @ phi.T) + # ucb = np.minimum(ucb, self.B * S.volume() * dt) + # + # lcb = map - beta * np.sqrt(phi @ self.W_inv_approx @ phi.T) + # lcb = np.maximum(lcb, self.b * S.volume() * dt) + # return map, lcb, ucb + + def construct_covariance_matrix_regression(self): + + W = torch.zeros(size=(self.get_m(), self.get_m())).double() + + if self.data is not None: + variances = self.variances + + if self.feedback == "count-record": + mask = self.bucketized_counts > 0 + tau = self.total_bucketized_time + for index_o, o in enumerate(self.bucketized_obs): + n = mask[index_o] + if n > 0: + A = ( + self.varphis[index_o, :].view(-1, 1) + @ self.varphis[index_o, :].view(1, -1) + * tau[index_o] + ) + W = W + A / (variances[index_o]) + + elif self.feedback == "histogram": + + for datapoint in self.data: + (S, obs, dt) = datapoint + varphi = self.packing.integral(S) * dt + variance = varphi @ self.rate + variance = variance + A = varphi.view(-1, 1) @ varphi.view(1, -1) + W = W + A / variance + + return W + torch.eye(self.get_m()).double() * self.s + + def mean_set(self, S, dt=1.0): + mu = 0 + for set in self.basic_sets: + if S.inside(set): + mu = mu + torch.exp(dt * self.packing.integral(set) @ self.rate) + return mu if __name__ == "__main__": - torch.manual_seed(2) - np.random.seed(2) - d = 1 - gamma = 0.1 - n = 64 - B = 4. - b = 0.1 - - process = PoissonPointProcess(d=1, B=B, b=b) - Sets = [] - levels = 5 - hierarchical_structure = HierarchicalBorelSets(d=1, interval=(-1, 1), levels=levels) - Sets = hierarchical_structure.get_all_sets() - - D = BorelSet(1, bounds=torch.Tensor([[-1., 1.]]).double()) - - m = 128 - k = KernelFunction(gamma = gamma) - estimator = LogLinearRateEstimator(process, hierarchical_structure, - kernel_object=k, B=B, m=m, d=d, estimator='least-sq') - - min_vol, max_vol = estimator.get_min_max() - - dt = 1. / (b * min_vol) - dt = dt * 2 - - print("Suggested dt:", dt) - c = ['k', 'r', 'b', 'y', 'g', 'orange', 'brown', 'purple'] + ['k' for i in range(500)] - - no_sets = len(Sets) - no_samples = 0 - data = [] - samples = [] - repeats = 2 - - for i in range(no_samples): - j = np.random.randint(0, no_sets, 1) - S = Sets[j[0]] - for _ in range(repeats): - sample = process.sample_discretized(S, dt) - samples.append(sample) - data.append((S, sample, dt)) - - sample_D = process.sample_discretized(D, dt) - samples.append(sample_D) - no_samples = repeats * no_samples + 1 - data.append((D, sample_D, dt)) - - estimator.load_data(data) - - xtest = D.return_discretization(n=n) - - # likelihood based - estimator.fit_gp() - - for set in estimator.basic_sets: - x = np.linspace(set.bounds[0,0],set.bounds[0,1],2) - val = estimator.mean_set(set) - plt.plot(x,x*0+float(val),'b-o') - vol = process.rate_volume(set) - plt.plot(x, x * 0 + float(vol), '-o',color = 'orange') - for j in range(no_samples): - if samples[j] is not None: - plt.plot(samples[j], samples[j] * 0, 'o', color=c[j]) - - process.visualize(D, samples=0, n=n, dt=1.) \ No newline at end of file + torch.manual_seed(2) + np.random.seed(2) + d = 1 + gamma = 0.1 + n = 64 + B = 4.0 + b = 0.1 + + process = PoissonPointProcess(d=1, B=B, b=b) + Sets = [] + levels = 5 + hierarchical_structure = HierarchicalBorelSets(d=1, interval=(-1, 1), levels=levels) + Sets = hierarchical_structure.get_all_sets() + + D = BorelSet(1, bounds=torch.Tensor([[-1.0, 1.0]]).double()) + + m = 128 + k = KernelFunction(gamma=gamma) + estimator = LogLinearRateEstimator( + process, + hierarchical_structure, + kernel_object=k, + B=B, + m=m, + d=d, + estimator="least-sq", + ) + + min_vol, max_vol = estimator.get_min_max() + + dt = 1.0 / (b * min_vol) + dt = dt * 2 + + print("Suggested dt:", dt) + c = ["k", "r", "b", "y", "g", "orange", "brown", "purple"] + [ + "k" for i in range(500) + ] + + no_sets = len(Sets) + no_samples = 0 + data = [] + samples = [] + repeats = 2 + + for i in range(no_samples): + j = np.random.randint(0, no_sets, 1) + S = Sets[j[0]] + for _ in range(repeats): + sample = process.sample_discretized(S, dt) + samples.append(sample) + data.append((S, sample, dt)) + + sample_D = process.sample_discretized(D, dt) + samples.append(sample_D) + no_samples = repeats * no_samples + 1 + data.append((D, sample_D, dt)) + + estimator.load_data(data) + + xtest = D.return_discretization(n=n) + + # likelihood based + estimator.fit_gp() + + for set in estimator.basic_sets: + x = np.linspace(set.bounds[0, 0], set.bounds[0, 1], 2) + val = estimator.mean_set(set) + plt.plot(x, x * 0 + float(val), "b-o") + vol = process.rate_volume(set) + plt.plot(x, x * 0 + float(vol), "-o", color="orange") + for j in range(no_samples): + if samples[j] is not None: + plt.plot(samples[j], samples[j] * 0, "o", color=c[j]) + + process.visualize(D, samples=0, n=n, dt=1.0) diff --git a/stpy/point_processes/mbr_positive_estimator.py b/stpy/point_processes/mbr_positive_estimator.py index ac753c5..de86c9e 100644 --- a/stpy/point_processes/mbr_positive_estimator.py +++ b/stpy/point_processes/mbr_positive_estimator.py @@ -9,357 +9,428 @@ import numpy as np import mosek -class MBRPositiveEstimator(PermanentalProcessRateEstimator): - - def __init__(self, *args, **kwargs): - super().__init__(*args,**kwargs) - - if self.feedback == "count-record": - self.varLambdas_vec = torch.zeros( size = (self.varLambdas.size()[0],self.varLambdas.size()[1]*self.varLambdas.size()[2])).double() - for i in range(self.varLambdas.size()[0]): - self.varLambdas_vec[i,:] = self.varLambdas[i,:,:].reshape(-1) - - self.approx_solver = True - - def fit_gp(self, threads=4): - if self.data is not None: - super().fit_gp(threads=threads) - else: - self.rate = None - - def mean_rate(self, S, n=128): - xtest = S.return_discretization(n) - emb = self.packing.embed(xtest) - mu = torch.einsum('ij,jk,ik->i',emb,self.rate,emb).view(-1,1) - return mu - def rate_value(self, x, dt=1): - emb = self.packing.embed(x)*dt - mu = torch.einsum('ij,jk,ik->i',emb,self.rate,emb).view(-1,1) - return mu - - def mean_set(self,S,dt =1.): - if self.data is not None: - emb = self.product_integral(S) * dt - mu = torch.trace(emb@self.rate).view(1,1) - else: - mu = self.b*S.volume() - return mu - - def penalized_likelihood(self, threads=4): - sumLambda = self.sumLambda.numpy() - Theta = cp.Variable((self.get_m(), self.get_m()), symmetric=True) - - if self.observations is not None: - observations = self.observations.numpy() - # cost = cp.sum_squares(cp.diag(emb @ A @ emb.T) - y.view(-1).numpy()) / (self.s ** 2) + (self.lam) * cp.norm(A, "fro") - objective = -cp.sum(cp.log(observations @ Theta @ observations.T)) + \ - cp.trace(sumLambda @ Theta) + self.s * cp.sum_squares(cp.vec(Theta)) - else: - objective = cp.trace(sumLambda @ Theta) + self.s * cp.sum_squares(cp.vec(Theta)) - - # if self.get_m() == 2: - # # use Lorentz-cone special result - # constraints = [cp.SOC(Theta[0,0]+Theta[1,1],Theta[1,1] )] - # else: - # constraints = [Theta >> 0] - constraints = [] - prob = cp.Problem(cp.Minimize(objective), constraints) - - prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False, - mosek_params={mosek.iparam.num_threads: threads, - mosek.iparam.intpnt_solve_form:mosek.solveform.dual, - mosek.dparam.intpnt_co_tol_pfeas:1e-3, - mosek.dparam.intpnt_co_tol_dfeas:1e-3, - mosek.dparam.intpnt_co_tol_rel_gap:1e-3}) - self.rate = torch.from_numpy(Theta.value) - return self.rate - - - - def penalized_likelihood_bins(self, threads=4): - Theta = cp.Variable((self.get_m(), self.get_m()), symmetric=True) - - - mask = self.bucketized_counts.clone().numpy() > 0 - observations = self.total_bucketized_obs[mask].clone().numpy() - tau = self.total_bucketized_time[mask].clone().numpy() - varLambdas_vec = self.varLambdas_vec[mask,:].clone().numpy() - - - objective = -cp.sum(observations @ cp.log(cp.multiply(tau,varLambdas_vec @ cp.vec(Theta)) ) ) + \ - cp.sum(cp.multiply(tau,varLambdas_vec @ cp.vec(Theta))) + self.s * cp.sum_squares(cp.vec(Theta)) - - constraints = [Theta >> 0] - prob = cp.Problem(cp.Minimize(objective), constraints) - - prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False, - mosek_params={mosek.iparam.num_threads: threads, - mosek.iparam.intpnt_solve_form:mosek.solveform.dual, - mosek.dparam.intpnt_co_tol_pfeas:1e-3, - mosek.dparam.intpnt_co_tol_dfeas:1e-3, - mosek.dparam.intpnt_co_tol_rel_gap:1e-3}) - self.rate = torch.from_numpy(Theta.value) - return self.rate - - - def least_squares_weighted(self,threads = 4 ): +class MBRPositiveEstimator(PermanentalProcessRateEstimator): - if self.approx_fit == False: - self.bucketization() - - Theta = cp.Variable((self.get_m(), self.get_m()), symmetric=True) - - mask = self.bucketized_counts.clone().numpy() > 0 - observations = self.total_bucketized_obs[mask].clone().numpy() - tau = self.total_bucketized_time.clone().numpy() - - # varsumLambdas - varLambdas_vec = self.varLambdas_vec[mask,:].clone().numpy() - - variances = self.variances.view(-1).clone().numpy() - - for i in range(variances.shape[0]): - if mask[i] > 0: - variances[i] = variances[i] * tau[i]* self.variance_correction(variances[i] * tau[i]) - - selected_variances = variances[mask] - - - objective = cp.sum_squares( (varLambdas_vec@cp.vec(Theta) + - - observations)/np.sqrt(selected_variances) )+ self.s*cp.sum_squares(cp.vec(Theta))/2 - constraints = [Theta >> 0] - prob = cp.Problem(cp.Minimize(objective), constraints) - - prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False, - mosek_params={mosek.iparam.num_threads: threads, - mosek.iparam.intpnt_solve_form:mosek.solveform.dual, - mosek.dparam.intpnt_co_tol_pfeas:1e-3, - mosek.dparam.intpnt_co_tol_dfeas:1e-3, - mosek.dparam.intpnt_co_tol_rel_gap:1e-3}) - - self.rate = torch.from_numpy(Theta.value) - return self.rate - - def construct_covariance_matrix(self): - if self.estimator == "bins": - self.construct_covariance_matrix_bins() - elif self.estimator =="least-sq": - self.construct_covariance_matrix_regression() - else: - raise NotImplementedError("Covariance not implemented") - - def construct_covariance_matrix_regression(self): - varLambdas = self.varLambdas_vec.clone() - variances = self.variances - mask = self.bucketized_counts > 0 - tau = self.total_bucketized_time - W = torch.zeros(size=(self.get_m()**2, self.get_m()**2)).double() - I = torch.eye(self.get_m() ** 2).double() - W_inv = self.s * torch.eye(self.get_m() ** 2).double() - - for index_o, o in enumerate(self.bucketized_obs): - n = mask[index_o] - if n > 0: - k = self.variance_correction(tau[index_o] * variances[index_o]) - v = tau[index_o] / (variances[index_o] * k) - - vec = varLambdas[index_o, :].view(-1, 1) - A = vec @ vec.T - W = W + A * v - denom = 1. + v*vec.T@W_inv@vec - W_inv = W_inv @ (I - v* vec@(vec.T@W_inv)/denom ) - - self.W = W + self.s * torch.eye(self.get_m() ** 2).double() - self.W_inv = W_inv - #self.W_cholesky = torch.cholesky(self.W, upper=True) - return self.W - - - def construct_covariance_matrix_bins(self): - self.construct_covariance_matrix_regression() - - def mean_var_reg_set(self,S, dt=1., beta=2., lcb_compute = False): - - if self.data is None: - return S.volume()*self.b,S.volume()*self.B,S.volume()*self.b - - if self.approx_fit == False: - self.W = self.construct_covariance_matrix() - self.approx_fit = True - - map = None - lcb = None - - if self.approx_solver == True: - ucb = self.band_no_opt(S, beta=beta, dt=dt, maximization=True) - if lcb_compute == True: - lcb = self.band_no_opt(S, beta=beta, dt=dt, maximization=False) - else: - ucb = self.band(S, beta=beta,dt=dt, maximization=True) - if lcb_compute == True: - lcb = self.band(S, beta=beta,dt=dt, maximization=False) - - return map, ucb, lcb - - def mean_var_bins_set(self,S, dt=1., beta=2., lcb_compute = False): - return self.mean_var_reg_set(S, dt=dt, beta=beta, lcb_compute = lcb_compute) - - def band(self, S, beta=2.,dt=1., maximization=True): - emb = self.product_integral(S) * dt - A = cp.Variable((self.get_m(), self.get_m()), symmetric=True) - cost = cp.trace(A @ emb) - Z = self.W_cholesky.clone() - zero = np.zeros(self.get_m() ** 2) - constraints = [cp.SOC(zero.T @ cp.vec(A) + self.s * beta**2, Z @ (cp.vec(A) - cp.vec(self.rate.numpy())))] - constraints += [A >> 0] - - if maximization == True: - prob = cp.Problem(cp.Maximize(cost), constraints) - else: - prob = cp.Problem(cp.Minimize(cost), constraints) - - prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False, - mosek_params={mosek.iparam.num_threads: 4, - mosek.iparam.intpnt_solve_form:mosek.solveform.dual, - mosek.dparam.intpnt_co_tol_pfeas:1e-3, - mosek.dparam.intpnt_co_tol_dfeas:1e-3, - mosek.dparam.intpnt_co_tol_rel_gap:1e-3}) - ucb = torch.trace(torch.from_numpy(A.value) @ emb) - return ucb - - def band_no_opt(self, S, beta=2.,dt=1., maximization=True): - - if self.rate is None: - if maximization == True: - return S.volume()*dt*self.B - else: - return S.volume() * dt * self.b - else: - emb = self.product_integral(S) - cost = torch.trace(self.rate @ emb) - if maximization == True: - out = cost + beta* emb.view(1,-1)@self.W_inv@emb.view(-1,1) - else: - out = np.maximum(cost - beta* emb.view(1,-1)@self.W_inv@emb.view(-1,1),0.) - return out*dt - - def gap(self, S, actions, w, dt, beta=2.): - """ - Estimates the gap of an action S, - :param S: - :param dt: - :return: - """ - - if self.data is None: - return (self.B-self.b)*S.volume()/w(S) - - if self.ucb_identified == False: - print("Recomputing UCB.....") - self.ucb_identified = True - self.max_ucb = -1000 - self.ucb_action = None - for action in actions: - _, ucb,__ = self.mean_var_reg_set(action, dt=dt, beta=self.beta(0)) - ucb = ucb / w(action) - if ucb > self.max_ucb: - self.max_ucb = ucb - self.ucb_action = action - map, ucb, lcb = self.mean_var_reg_set(S, dt=dt, beta=self.beta(0), lcb_compute = True) - gap = w(S) * self.max_ucb - lcb - return gap - - def information(self, S, dt, precomputed = None): - - if self.data is None: - return 1. - - if self.W is None: - self.construct_covariance_matrix() - - if self.feedback == "count-record": - varphi_UCB = self.product_integral(self.ucb_action).view(1,-1)*dt - - ind = [] - for index, set in enumerate(self.basic_sets): - if S.inside(set): - ind.append(index) - Upsilon = self.varLambdas_vec[ind, :]*dt - - I = torch.eye(Upsilon.size()[0]).double() - G = self.W_inv - self.W_inv@Upsilon.T@torch.inverse(I + Upsilon @ Upsilon.T)@Upsilon@self.W_inv - return 10e-4 + torch.logdet( varphi_UCB @self.W_inv @ varphi_UCB.T) - torch.logdet( varphi_UCB @ G @ varphi_UCB.T) - - elif self.feedback =="histogram": - raise NotImplementedError("Not implemented.") + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + if self.feedback == "count-record": + self.varLambdas_vec = torch.zeros( + size=( + self.varLambdas.size()[0], + self.varLambdas.size()[1] * self.varLambdas.size()[2], + ) + ).double() + for i in range(self.varLambdas.size()[0]): + self.varLambdas_vec[i, :] = self.varLambdas[i, :, :].reshape(-1) + + self.approx_solver = True + + def fit_gp(self, threads=4): + if self.data is not None: + super().fit_gp(threads=threads) + else: + self.rate = None + + def mean_rate(self, S, n=128): + xtest = S.return_discretization(n) + emb = self.packing.embed(xtest) + mu = torch.einsum("ij,jk,ik->i", emb, self.rate, emb).view(-1, 1) + return mu + + def rate_value(self, x, dt=1): + emb = self.packing.embed(x) * dt + mu = torch.einsum("ij,jk,ik->i", emb, self.rate, emb).view(-1, 1) + return mu + + def mean_set(self, S, dt=1.0): + if self.data is not None: + emb = self.product_integral(S) * dt + mu = torch.trace(emb @ self.rate).view(1, 1) + else: + mu = self.b * S.volume() + return mu + + def penalized_likelihood(self, threads=4): + sumLambda = self.sumLambda.numpy() + Theta = cp.Variable((self.get_m(), self.get_m()), symmetric=True) + + if self.observations is not None: + observations = self.observations.numpy() + # cost = cp.sum_squares(cp.diag(emb @ A @ emb.T) - y.view(-1).numpy()) / (self.s ** 2) + (self.lam) * cp.norm(A, "fro") + objective = ( + -cp.sum(cp.log(observations @ Theta @ observations.T)) + + cp.trace(sumLambda @ Theta) + + self.s * cp.sum_squares(cp.vec(Theta)) + ) + else: + objective = cp.trace(sumLambda @ Theta) + self.s * cp.sum_squares( + cp.vec(Theta) + ) + + # if self.get_m() == 2: + # # use Lorentz-cone special result + # constraints = [cp.SOC(Theta[0,0]+Theta[1,1],Theta[1,1] )] + # else: + # constraints = [Theta >> 0] + constraints = [] + prob = cp.Problem(cp.Minimize(objective), constraints) + + prob.solve( + solver=cp.MOSEK, + warm_start=False, + verbose=False, + mosek_params={ + mosek.iparam.num_threads: threads, + mosek.iparam.intpnt_solve_form: mosek.solveform.dual, + mosek.dparam.intpnt_co_tol_pfeas: 1e-3, + mosek.dparam.intpnt_co_tol_dfeas: 1e-3, + mosek.dparam.intpnt_co_tol_rel_gap: 1e-3, + }, + ) + self.rate = torch.from_numpy(Theta.value) + return self.rate + + def penalized_likelihood_bins(self, threads=4): + Theta = cp.Variable((self.get_m(), self.get_m()), symmetric=True) + + mask = self.bucketized_counts.clone().numpy() > 0 + observations = self.total_bucketized_obs[mask].clone().numpy() + tau = self.total_bucketized_time[mask].clone().numpy() + varLambdas_vec = self.varLambdas_vec[mask, :].clone().numpy() + + objective = ( + -cp.sum( + observations @ cp.log(cp.multiply(tau, varLambdas_vec @ cp.vec(Theta))) + ) + + cp.sum(cp.multiply(tau, varLambdas_vec @ cp.vec(Theta))) + + self.s * cp.sum_squares(cp.vec(Theta)) + ) + + constraints = [Theta >> 0] + prob = cp.Problem(cp.Minimize(objective), constraints) + + prob.solve( + solver=cp.MOSEK, + warm_start=False, + verbose=False, + mosek_params={ + mosek.iparam.num_threads: threads, + mosek.iparam.intpnt_solve_form: mosek.solveform.dual, + mosek.dparam.intpnt_co_tol_pfeas: 1e-3, + mosek.dparam.intpnt_co_tol_dfeas: 1e-3, + mosek.dparam.intpnt_co_tol_rel_gap: 1e-3, + }, + ) + self.rate = torch.from_numpy(Theta.value) + return self.rate + + def least_squares_weighted(self, threads=4): + + if self.approx_fit == False: + self.bucketization() + + Theta = cp.Variable((self.get_m(), self.get_m()), symmetric=True) + + mask = self.bucketized_counts.clone().numpy() > 0 + observations = self.total_bucketized_obs[mask].clone().numpy() + tau = self.total_bucketized_time.clone().numpy() + + # varsumLambdas + varLambdas_vec = self.varLambdas_vec[mask, :].clone().numpy() + + variances = self.variances.view(-1).clone().numpy() + + for i in range(variances.shape[0]): + if mask[i] > 0: + variances[i] = ( + variances[i] + * tau[i] + * self.variance_correction(variances[i] * tau[i]) + ) + + selected_variances = variances[mask] + + objective = ( + cp.sum_squares( + (varLambdas_vec @ cp.vec(Theta) + -observations) + / np.sqrt(selected_variances) + ) + + self.s * cp.sum_squares(cp.vec(Theta)) / 2 + ) + constraints = [Theta >> 0] + prob = cp.Problem(cp.Minimize(objective), constraints) + + prob.solve( + solver=cp.MOSEK, + warm_start=False, + verbose=False, + mosek_params={ + mosek.iparam.num_threads: threads, + mosek.iparam.intpnt_solve_form: mosek.solveform.dual, + mosek.dparam.intpnt_co_tol_pfeas: 1e-3, + mosek.dparam.intpnt_co_tol_dfeas: 1e-3, + mosek.dparam.intpnt_co_tol_rel_gap: 1e-3, + }, + ) + + self.rate = torch.from_numpy(Theta.value) + return self.rate + + def construct_covariance_matrix(self): + if self.estimator == "bins": + self.construct_covariance_matrix_bins() + elif self.estimator == "least-sq": + self.construct_covariance_matrix_regression() + else: + raise NotImplementedError("Covariance not implemented") + + def construct_covariance_matrix_regression(self): + varLambdas = self.varLambdas_vec.clone() + variances = self.variances + mask = self.bucketized_counts > 0 + tau = self.total_bucketized_time + W = torch.zeros(size=(self.get_m() ** 2, self.get_m() ** 2)).double() + I = torch.eye(self.get_m() ** 2).double() + W_inv = self.s * torch.eye(self.get_m() ** 2).double() + + for index_o, o in enumerate(self.bucketized_obs): + n = mask[index_o] + if n > 0: + k = self.variance_correction(tau[index_o] * variances[index_o]) + v = tau[index_o] / (variances[index_o] * k) + + vec = varLambdas[index_o, :].view(-1, 1) + A = vec @ vec.T + W = W + A * v + denom = 1.0 + v * vec.T @ W_inv @ vec + W_inv = W_inv @ (I - v * vec @ (vec.T @ W_inv) / denom) + + self.W = W + self.s * torch.eye(self.get_m() ** 2).double() + self.W_inv = W_inv + # self.W_cholesky = torch.cholesky(self.W, upper=True) + return self.W + + def construct_covariance_matrix_bins(self): + self.construct_covariance_matrix_regression() + + def mean_var_reg_set(self, S, dt=1.0, beta=2.0, lcb_compute=False): + + if self.data is None: + return S.volume() * self.b, S.volume() * self.B, S.volume() * self.b + + if self.approx_fit == False: + self.W = self.construct_covariance_matrix() + self.approx_fit = True + + map = None + lcb = None + + if self.approx_solver == True: + ucb = self.band_no_opt(S, beta=beta, dt=dt, maximization=True) + if lcb_compute == True: + lcb = self.band_no_opt(S, beta=beta, dt=dt, maximization=False) + else: + ucb = self.band(S, beta=beta, dt=dt, maximization=True) + if lcb_compute == True: + lcb = self.band(S, beta=beta, dt=dt, maximization=False) + + return map, ucb, lcb + + def mean_var_bins_set(self, S, dt=1.0, beta=2.0, lcb_compute=False): + return self.mean_var_reg_set(S, dt=dt, beta=beta, lcb_compute=lcb_compute) + + def band(self, S, beta=2.0, dt=1.0, maximization=True): + emb = self.product_integral(S) * dt + A = cp.Variable((self.get_m(), self.get_m()), symmetric=True) + cost = cp.trace(A @ emb) + Z = self.W_cholesky.clone() + zero = np.zeros(self.get_m() ** 2) + constraints = [ + cp.SOC( + zero.T @ cp.vec(A) + self.s * beta**2, + Z @ (cp.vec(A) - cp.vec(self.rate.numpy())), + ) + ] + constraints += [A >> 0] + + if maximization == True: + prob = cp.Problem(cp.Maximize(cost), constraints) + else: + prob = cp.Problem(cp.Minimize(cost), constraints) + + prob.solve( + solver=cp.MOSEK, + warm_start=False, + verbose=False, + mosek_params={ + mosek.iparam.num_threads: 4, + mosek.iparam.intpnt_solve_form: mosek.solveform.dual, + mosek.dparam.intpnt_co_tol_pfeas: 1e-3, + mosek.dparam.intpnt_co_tol_dfeas: 1e-3, + mosek.dparam.intpnt_co_tol_rel_gap: 1e-3, + }, + ) + ucb = torch.trace(torch.from_numpy(A.value) @ emb) + return ucb + + def band_no_opt(self, S, beta=2.0, dt=1.0, maximization=True): + + if self.rate is None: + if maximization == True: + return S.volume() * dt * self.B + else: + return S.volume() * dt * self.b + else: + emb = self.product_integral(S) + cost = torch.trace(self.rate @ emb) + if maximization == True: + out = cost + beta * emb.view(1, -1) @ self.W_inv @ emb.view(-1, 1) + else: + out = np.maximum( + cost - beta * emb.view(1, -1) @ self.W_inv @ emb.view(-1, 1), 0.0 + ) + return out * dt + + def gap(self, S, actions, w, dt, beta=2.0): + """ + Estimates the gap of an action S, + :param S: + :param dt: + :return: + """ + + if self.data is None: + return (self.B - self.b) * S.volume() / w(S) + + if self.ucb_identified == False: + print("Recomputing UCB.....") + self.ucb_identified = True + self.max_ucb = -1000 + self.ucb_action = None + for action in actions: + _, ucb, __ = self.mean_var_reg_set(action, dt=dt, beta=self.beta(0)) + ucb = ucb / w(action) + if ucb > self.max_ucb: + self.max_ucb = ucb + self.ucb_action = action + map, ucb, lcb = self.mean_var_reg_set( + S, dt=dt, beta=self.beta(0), lcb_compute=True + ) + gap = w(S) * self.max_ucb - lcb + return gap + + def information(self, S, dt, precomputed=None): + + if self.data is None: + return 1.0 + + if self.W is None: + self.construct_covariance_matrix() + + if self.feedback == "count-record": + varphi_UCB = self.product_integral(self.ucb_action).view(1, -1) * dt + + ind = [] + for index, set in enumerate(self.basic_sets): + if S.inside(set): + ind.append(index) + Upsilon = self.varLambdas_vec[ind, :] * dt + + I = torch.eye(Upsilon.size()[0]).double() + G = ( + self.W_inv + - self.W_inv + @ Upsilon.T + @ torch.inverse(I + Upsilon @ Upsilon.T) + @ Upsilon + @ self.W_inv + ) + return ( + 10e-4 + + torch.logdet(varphi_UCB @ self.W_inv @ varphi_UCB.T) + - torch.logdet(varphi_UCB @ G @ varphi_UCB.T) + ) + + elif self.feedback == "histogram": + raise NotImplementedError("Not implemented.") if __name__ == "__main__": - torch.manual_seed(2) - np.random.seed(2) - d = 1 - gamma = 0.2 - n = 64 - B = 4. - b = 0.5 - - process = PoissonPointProcess(d=1, B=B, b=b) - Sets = [] - levels = 3 - hierarchical_structure = HierarchicalBorelSets(d=1, interval=(-1, 1), levels=levels) - Sets = hierarchical_structure.get_all_sets() - - D = BorelSet(1, bounds=torch.Tensor([[-1., 1.]]).double()) - - m = 32 - embedding = HermiteEmbedding(m = m, d = 1, gamma = gamma) - k = KernelFunction(gamma = gamma) - estimator = MBRPositiveEstimator(process, hierarchical_structure, kernel_object=k, - B=B, m=m, d=d, embedding=embedding, basis = "custom") - min_vol, max_vol = estimator.get_min_max() - - dt = 10. / (b * min_vol) - dt = dt * 2 - - print("Suggested dt:", dt) - c = ['k', 'r', 'b', 'y', 'g', 'orange', 'brown', 'purple'] + ['k' for i in range(500)] - - no_sets = len(Sets) - no_samples = 0 - data = [] - samples = [] - repeats = 2 - - for i in range(no_samples): - j = np.random.randint(0, no_sets, 1) - S = Sets[j[0]] - for _ in range(repeats): - sample = process.sample_discretized(S, dt) - samples.append(sample) - data.append((S, sample, dt)) - - sample_D = process.sample_discretized(D, dt) - samples.append(sample_D) - no_samples = repeats * no_samples + 1 - data.append((D, sample_D, dt)) - - estimator.load_data(data) - - xtest = D.return_discretization(n=n) - - # likelihood based - estimator.penalized_likelihood() - rate_mean = estimator.mean_rate(D,n = n) - - #_, lcb, ucb = estimator.map_lcb_ucb(D, n, beta=2.) - - - for j in range(no_samples): - if samples[j] is not None: - plt.plot(samples[j], samples[j] * 0, 'o', color=c[j]) - - plt.plot(xtest, rate_mean, label='likelihood - locations known') - #plt.fill_between(xtest.numpy().flatten(), lcb.numpy().flatten(), ucb.numpy().flatten(), alpha=0.4, - # color='blue', label='triangle') - process.visualize(D, samples=0, n=n, dt=1.) + torch.manual_seed(2) + np.random.seed(2) + d = 1 + gamma = 0.2 + n = 64 + B = 4.0 + b = 0.5 + + process = PoissonPointProcess(d=1, B=B, b=b) + Sets = [] + levels = 3 + hierarchical_structure = HierarchicalBorelSets(d=1, interval=(-1, 1), levels=levels) + Sets = hierarchical_structure.get_all_sets() + + D = BorelSet(1, bounds=torch.Tensor([[-1.0, 1.0]]).double()) + + m = 32 + embedding = HermiteEmbedding(m=m, d=1, gamma=gamma) + k = KernelFunction(gamma=gamma) + estimator = MBRPositiveEstimator( + process, + hierarchical_structure, + kernel_object=k, + B=B, + m=m, + d=d, + embedding=embedding, + basis="custom", + ) + min_vol, max_vol = estimator.get_min_max() + + dt = 10.0 / (b * min_vol) + dt = dt * 2 + + print("Suggested dt:", dt) + c = ["k", "r", "b", "y", "g", "orange", "brown", "purple"] + [ + "k" for i in range(500) + ] + + no_sets = len(Sets) + no_samples = 0 + data = [] + samples = [] + repeats = 2 + + for i in range(no_samples): + j = np.random.randint(0, no_sets, 1) + S = Sets[j[0]] + for _ in range(repeats): + sample = process.sample_discretized(S, dt) + samples.append(sample) + data.append((S, sample, dt)) + + sample_D = process.sample_discretized(D, dt) + samples.append(sample_D) + no_samples = repeats * no_samples + 1 + data.append((D, sample_D, dt)) + + estimator.load_data(data) + + xtest = D.return_discretization(n=n) + + # likelihood based + estimator.penalized_likelihood() + rate_mean = estimator.mean_rate(D, n=n) + + # _, lcb, ucb = estimator.map_lcb_ucb(D, n, beta=2.) + + for j in range(no_samples): + if samples[j] is not None: + plt.plot(samples[j], samples[j] * 0, "o", color=c[j]) + + plt.plot(xtest, rate_mean, label="likelihood - locations known") + # plt.fill_between(xtest.numpy().flatten(), lcb.numpy().flatten(), ucb.numpy().flatten(), alpha=0.4, + # color='blue', label='triangle') + process.visualize(D, samples=0, n=n, dt=1.0) diff --git a/stpy/point_processes/poisson.py b/stpy/point_processes/poisson.py index 6776f2d..12703fc 100644 --- a/stpy/point_processes/poisson.py +++ b/stpy/point_processes/poisson.py @@ -4,157 +4,194 @@ from stpy.borel_set import BorelSet +class PoissonPointProcess: + """ + parametrized by log linear model + + """ + + def __init__(self, d=1, B=1, b=0.2, rate=None, rate_volume=None): + self.B = B + self.d = d + self.b = b + if rate is None: + self.rate = self.rate_default + else: + self.rate = rate + + self.rate_volume_f = rate_volume + self.exact = True + + def rate_default(self, x, dt=1.0): + return ( + self.B + * torch.sum( + torch.exp(-(x + 1)) * torch.sin(2 * x * np.pi) ** 2, dim=1 + ).view(-1, 1) + + self.b + ) * dt + + def rate_volume(self, S, dt=1, rate=None): + if self.rate_volume_f is None: + # integrate rate numerically over S + import scipy.integrate as integrate + + if rate is None: + rate = self.rate + else: + rate = rate + integral = 0 + if self.d == 1: + # integrate = S.volume()* self.rate(torch.from_numpy(S.bounds[0,1]).view(1)) + integral, _ = integrate.quad( + lambda x: rate(torch.Tensor([x]).view(1, 1)).numpy(), + float(S.bounds[0, 0]), + float(S.bounds[0, 1]), + ) + elif self.d == 2: + integrand = lambda x, y: rate( + torch.Tensor([x, y]).view(1, 2).double() + ).numpy() + integral, _ = integrate.dblquad( + integrand, + float(S.bounds[0, 0]), + float(S.bounds[0, 1]), + lambda x: float(S.bounds[1, 0]), + lambda x: float(S.bounds[1, 1]), + ) + + return integral * dt + else: + return self.rate_volume_f(S) * dt + + def sample_discretized(self, S, dt, n=50): + lam = np.maximum(float(self.rate_volume(S, dt)), 0) + count = np.random.poisson(lam=lam) + if count > 0: + x = S.return_discretization(n) + r = self.rate(x) * dt + r = torch.maximum(r, r * 0) + sample = torch.from_numpy( + np.random.choice( + np.arange(0, x.size()[0], 1), + size=count, + p=(r / torch.sum(r)).numpy().reshape(-1), + ) + ) + return x[sample, :] + else: + return None + + def sample_discretized_direct(self, x, val): + lam = 1000 + count = np.random.poisson(lam=np.maximum(0, lam)) + if count > 0: + val = torch.abs(val) + sample = torch.from_numpy( + np.random.choice( + np.arange(0, x.size()[0], 1), + size=count, + p=(val / torch.sum(val)).numpy().reshape(-1), + ) + ) + return x[sample, :] + else: + return None + + def sample(self, S, dt=1.0, verbose=False, rate=None): + """ + + :param S: set where it should be sampled + :return: + """ + if self.exact == True: + + return self.sample_discretized(S, dt=dt) + + else: + + lam = self.rate_volume(S, dt) + n = np.random.poisson(lam=lam) + new_sample = [] + vol = S.volume() + size = 0 + + alpha = 1.0 / lam + + while size < n: + # uniform sample g(s) = 1/vol(S) + sample = S.uniform_sample(1) + + t = self.rate(sample) / (alpha * lam) + p = np.random.uniform(0, 1) + if p < t: + new_sample.append(sample.view(1, -1)) + size = size + 1 + + if len(new_sample) > 1: + x = torch.cat(new_sample, dim=0) + else: + return None + return x + + def rate_sets(self, Sets, dt=1): + res = [] + for S in Sets: + res.append(self.rate_volume(S, dt=dt)) + return res + + def visualize(self, S, samples=2, n=10, dt=1.0, show=True): + xtest = S.return_discretization(n) + rate = self.rate(xtest) + + if self.d == 1: + plt.plot(xtest, rate, label="rate", lw=3) + for i in range(samples): + + x = self.sample(S, dt=dt) + if x is not None: + n = x.size()[0] + plt.plot(x, x * 0, "o", label="sample n=" + str(n)) + + elif self.d == 2: + from scipy.interpolate import griddata + + xx = xtest[:, 0].detach().numpy() + yy = xtest[:, 1].detach().numpy() + grid_x, grid_y = np.mgrid[ + min(xx) : max(xx) : 100j, min(yy) : max(yy) : 100j + ] + grid_z_mu = griddata( + (xx, yy), rate[:, 0].detach().numpy(), (grid_x, grid_y), method="linear" + ) + fig, ax = plt.subplots(figsize=(15, 7)) + cs = ax.contourf(grid_x, grid_y, grid_z_mu, label="rate") + ax.contour(cs, colors="k") + + for i in range(samples): + x = self.sample(S, dt=dt) + if x is not None: + ax.plot( + x[:, 0].detach().numpy(), + x[:, 1].detach().numpy(), + "o", + ms=10, + alpha=0.5, + label="sample", + ) + ax.grid(c="k", ls="-", alpha=0.1) + plt.colorbar(cs) + + plt.legend() + if show == True: + plt.show() -class PoissonPointProcess(): - """ - parametrized by log linear model - - """ - def __init__(self, d = 1, B = 1, b= 0.2, rate = None, rate_volume = None): - self.B = B - self.d = d - self.b = b - if rate is None: - self.rate = self.rate_default - else: - self.rate = rate - - - self.rate_volume_f = rate_volume - self.exact = True - - def rate_default(self,x, dt = 1.): - return (self.B*torch.sum(torch.exp(-(x+1))*torch.sin(2*x*np.pi)**2 ,dim =1).view(-1,1)+ self.b) *dt - - def rate_volume(self,S, dt = 1, rate = None): - if self.rate_volume_f is None: - # integrate rate numerically over S - import scipy.integrate as integrate - if rate is None: - rate = self.rate - else: - rate = rate - integral = 0 - if self.d == 1: - #integrate = S.volume()* self.rate(torch.from_numpy(S.bounds[0,1]).view(1)) - integral,_ = integrate.quad(lambda x : rate(torch.Tensor([x]).view(1,1)).numpy(), float(S.bounds[0,0]), float(S.bounds[0,1]) ) - elif self.d ==2: - integrand = lambda x, y: rate(torch.Tensor([x, y]).view(1, 2).double()).numpy() - integral, _ = integrate.dblquad(integrand, float(S.bounds[0, 0]), float(S.bounds[0, 1]), - lambda x: float(S.bounds[1, 0]), lambda x: float(S.bounds[1, 1])) - - return integral*dt - else: - return self.rate_volume_f(S)*dt - - def sample_discretized(self, S, dt, n = 50): - lam = np.maximum(float(self.rate_volume(S, dt)),0) - count = np.random.poisson(lam=lam) - if count > 0: - x = S.return_discretization(n) - r = self.rate(x)*dt - r = torch.maximum(r,r*0) - sample = torch.from_numpy(np.random.choice(np.arange(0,x.size()[0],1), size = count, p=(r/torch.sum(r)).numpy().reshape(-1) )) - return x[sample,:] - else: - return None - - def sample_discretized_direct(self, x,val): - lam = 1000 - count = np.random.poisson(lam=np.maximum(0,lam)) - if count > 0: - val = torch.abs(val) - sample = torch.from_numpy(np.random.choice(np.arange(0,x.size()[0],1), - size = count, p=(val/torch.sum(val)).numpy().reshape(-1) )) - return x[sample,:] - else: - return None - - def sample(self, S, dt = 1., verbose = False, rate = None): - """ - - :param S: set where it should be sampled - :return: - """ - if self.exact == True: - - return self.sample_discretized(S, dt = dt) - - else: - - lam = self.rate_volume(S, dt) - n = np.random.poisson(lam = lam) - new_sample = [] - vol = S.volume() - size = 0 - - alpha = 1./lam - - while size1: - x = torch.cat(new_sample, dim = 0) - else: - return None - return x - - def rate_sets(self,Sets, dt = 1): - res = [] - for S in Sets: - res.append(self.rate_volume(S,dt=dt)) - return res - - def visualize(self,S,samples = 2, n = 10, dt = 1., show = True): - xtest = S.return_discretization(n) - rate = self.rate(xtest) - - if self.d == 1: - plt.plot(xtest, rate, label='rate', lw = 3) - for i in range(samples): - - x = self.sample(S, dt= dt) - if x is not None: - n = x.size()[0] - plt.plot(x,x*0,'o', label = 'sample n='+str(n)) - - elif self.d == 2: - from scipy.interpolate import griddata - xx = xtest[:, 0].detach().numpy() - yy = xtest[:, 1].detach().numpy() - grid_x, grid_y = np.mgrid[min(xx):max(xx):100j, min(yy):max(yy):100j] - grid_z_mu = griddata((xx, yy), rate[:, 0].detach().numpy(), (grid_x, grid_y), method='linear') - fig, ax = plt.subplots(figsize=(15, 7)) - cs = ax.contourf(grid_x, grid_y, grid_z_mu, label = 'rate') - ax.contour(cs, colors='k') - - for i in range(samples): - x = self.sample(S, dt = dt) - if x is not None: - ax.plot(x[:, 0].detach().numpy(), x[:, 1].detach().numpy(), 'o', ms=10, alpha = 0.5, label = 'sample') - ax.grid(c='k', ls='-', alpha=0.1) - plt.colorbar(cs) - - plt.legend() - if show == True: - plt.show() - if __name__ == "__main__": - d = 2 - n = 100 - bounds = torch.Tensor([[-1,1],[-1,1]]).double() - D = BorelSet(d, bounds) - - process = PoissonPointProcess(d = d, B = 2) - process.visualize(D, samples = 10, n = n, dt = 10) - - + d = 2 + n = 100 + bounds = torch.Tensor([[-1, 1], [-1, 1]]).double() + D = BorelSet(d, bounds) + process = PoissonPointProcess(d=d, B=2) + process.visualize(D, samples=10, n=n, dt=10) diff --git a/stpy/point_processes/poisson/link_fun_rate_estimator.py b/stpy/point_processes/poisson/link_fun_rate_estimator.py index d4e50d5..ad0b3f1 100644 --- a/stpy/point_processes/poisson/link_fun_rate_estimator.py +++ b/stpy/point_processes/poisson/link_fun_rate_estimator.py @@ -7,8 +7,14 @@ from stpy.borel_set import BorelSet, HierarchicalBorelSets from stpy.embeddings.embedding import HermiteEmbedding -from stpy.helpers.ellipsoid_algorithms import maximize_matrix_quadratic_on_ellipse, minimize_matrix_quadratic_on_ellipse -from stpy.helpers.ellipsoid_algorithms import maximize_quadratic_on_ellipse, minimize_quadratic_on_ellipse +from stpy.helpers.ellipsoid_algorithms import ( + maximize_matrix_quadratic_on_ellipse, + minimize_matrix_quadratic_on_ellipse, +) +from stpy.helpers.ellipsoid_algorithms import ( + maximize_quadratic_on_ellipse, + minimize_quadratic_on_ellipse, +) from stpy.helpers.quadrature_helper import quadvec2 from stpy.kernels import KernelFunction from stpy.point_processes.poisson import PoissonPointProcess @@ -17,452 +23,580 @@ ## implement loading data + class PermanentalProcessRateEstimator(PoissonRateEstimator): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - self.integration = "fixed_quad" - self.product_integrals = {} - self.varLambdas = torch.zeros(size=(len(self.basic_sets), self.get_m(), self.get_m())).double() - self.opt = 'cvxpy' - if self.feedback == "count-record" and self.estimator == "least-sq": - print("precomputing-integrals:") - for index_set, set in enumerate(self.basic_sets): - print(index_set, "/", len(self.basic_sets)) - self.varLambdas[index_set, :] = self.product_integral(set) - self.variances[index_set] = set.volume() * self.B - - def product_integral(self, S): - - if S in self.product_integrals.keys(): - return self.product_integrals[S] - else: - - if "product_integral" in dir(self.packing): - Psi = self.packing.product_integral(S) - self.product_integrals[S] = Psi - return Psi - - elif self.integration == "vec_quad": - - if S.d == 2: - # Psi = torch.zeros(size=(self.get_m(), self.get_m())).double() - F = lambda x: (self.packing.embed(x).view(-1, 1) @ \ - self.packing.embed(x).view(1, -1)).view(-1) - integrand = lambda x, y: F(torch.Tensor([x, y]).view(1, 2).double()).numpy() - - val = quadvec2(integrand, float(S.bounds[0, 0]), float(S.bounds[0, 1]), - float(S.bounds[1, 0]), float(S.bounds[1, 1]), limit=10, epsrel=10e-3, epsabs=10e-3, - quadrature='gk15') - Psi = torch.from_numpy(val).view((self.get_m(), self.get_m())) - - elif self.integration == "fixed_quad": - - if S.d == 1: - weights, nodes = S.return_legendre_discretization(n=128) - Z = self.packing.embed(nodes) - M = torch.einsum('ij,ik->ijk', Z, Z) - Psi = torch.einsum('i,ijk->jk', weights, M) - - if S.d == 2: - weights, nodes = S.return_legendre_discretization(n=50) - Z = self.packing.embed(nodes) - M = torch.einsum('ij,ik->ijk', Z, Z) - Psi = torch.einsum('i,ijk->jk', weights, M) - - else: - Psi = torch.zeros(size=(self.get_m(), self.get_m())).double() - for i in range(self.get_m()): - for j in range(self.get_m()): - - if S.d == 1: - F_ij = lambda x: ( - self.packing.embed(torch.from_numpy(np.array(x)).view(1, -1)).view(-1)[i] * - self.packing.embed(torch.from_numpy(np.array(x)).view(1, -1)).view(-1)[ - j]).numpy() - val, status = integrate.quad(F_ij, float(S.bounds[0, 0]), float(S.bounds[0, 1])) - - - elif S.d == 2: - F_ij = lambda x: self.packing.embed(x).view(-1)[i] * self.packing.embed(x).view(-1)[j] - integrand = lambda x, y: F_ij(torch.Tensor([x, y]).view(1, 2).double()).numpy() - val, status = integrate.dblquad(integrand, float(S.bounds[0, 0]), float(S.bounds[0, 1]), - lambda x: float(S.bounds[1, 0]), - lambda x: float(S.bounds[1, 1]), epsabs=1.49e-03, - epsrel=1.49e-03) - else: - raise NotImplementedError("Integration above d>2 not implemented.") - - Psi[i, j] = val - print(i, j, val) - - self.product_integrals[S] = Psi - return Psi - - def get_constraints(self): - s = self.get_m() - l = np.full(s, self.b) - u = np.full(s, self.B) - Lambda = np.identity(s) - return (l, Lambda, u) - - def cov(self, inverse=False): - s = self.get_m() - - if inverse == False: - return torch.zeros(size=(s, s)).double() - else: - return torch.zeros(size=(s, s)).double(), torch.zeros(size=(s, s)).double() - - def sample(self, verbose=False, steps=10, stepsize=None): - - if self.data is None: - self.sampled_theta = torch.zeros(self.get_m()).double().view(-1, 1) - return None - - if self.observations is not None: - observations = self.observations.double() - sumLambda = self.sumLambda.double() - nabla = lambda theta: -torch.sum(torch.diag(1. / (observations @ theta).view(-1)) @ observations) \ - + (sumLambda.T + sumLambda) @ theta + self.s * theta.view(-1, 1) - else: - sumLambda = self.sumLambda.double() - nabla = lambda theta: (sumLambda.T + sumLambda) @ theta + self.s * theta.view(-1, 1) - - theta = self.rate.view(-1, 1) - - W = self.construct_covariance_matrix_laplace() - L = float(scipy.sparse.linalg.eigsh(W.numpy(), k=1, which='LM', return_eigenvectors=False, tol=1e-3)) - eta = 0.5 / (L + 1) - - for k in range(steps): - W = torch.randn(size=(self.get_m(), 1)).double() - theta = theta - eta * nabla(theta) + np.sqrt(2 * eta) * W - if verbose == True: - print("Iter:", k, theta.T) - - self.sampled_theta = theta - return None - - def sample_value(self, S): - """ - Given a pre-sampled value evaluate certain portions of the domain S - :param S: - :return: - """ - Z = self.product_integral(S) - map = self.sampled_theta.T @ Z @ self.sampled_theta - return map - - def sample_path(self, S, n=128): - xtest = S.return_discretization(n) - return (self.packing.embed(xtest) @ self.sampled_theta) ** 2 - - def load_data(self, data): - super().load_data(data, times=False) - self.sumLambda = torch.zeros(size=(self.get_m(), self.get_m())) - if len(data) > 1: - for sample in data: - (S, obs, dt) = sample - self.sumLambda += self.product_integral(S) * dt - - def add_data_point(self, new_data): - super().add_data_point(new_data, times=False) - (S, obs, dt) = new_data - self.sumLambda += self.product_integral(S) * dt - - def penalized_likelihood(self, threads=4): - sumLambda = self.sumLambda.numpy() - if self.observations is not None: - observations = self.observations.numpy() - loss = lambda theta: float( - -np.sum(np.log((observations @ theta) ** 2)) + np.dot(theta, sumLambda @ theta) + 0.5 * self.s * np.sum( - theta ** 2)) - else: - loss = lambda theta: float(np.dot(theta, sumLambda @ theta) + 0.5 * self.s * np.sum(theta ** 2)) - - theta = np.random.randn(self.get_m()) - res = minimize(loss, theta, jac=None, method='L-BFGS-B') - self.rate = torch.from_numpy(res.x) - return self.rate - - def construct_covariance_matrix_laplace(self): - W = torch.zeros(size=(self.get_m(), self.get_m())).double() - - if self.feedback == "count-record": - if self.observations is not None: - for i in range(self.observations.size()[0]): - A = self.observations[i, :].view(-1, 1) @ self.observations[i, :].view(1, -1) - k = np.maximum(torch.dot(self.observations[i, :], self.rate.view(-1)) ** 2, self.b) - W = W + A / k - W += 2 * self.sumLambda - else: - raise AssertionError("Not implemented.") - return W + torch.eye(self.get_m()).double() * self.s - - def map_lcb_ucb_approx_action(self, S, dt=1., beta=2.): - - phi = self.packing.integral(S) - map = (phi @ self.rate) - - ucb = np.maximum((map + beta * np.sqrt(phi @ self.W_inv_approx @ phi.T)) ** 2, - (map - beta * np.sqrt(phi @ self.W_inv_approx @ phi.T)) ** 2) - ucb = np.minimum(ucb, self.B * S.volume() * dt) - lcb = 0. - - return dt * map ** 2, dt * lcb, dt * ucb - - def mean_std_per_action(self, S, W, dt, beta): - Z = self.product_integral(S) - - ucb, _ = maximize_matrix_quadratic_on_ellipse(Z.numpy(), (W).numpy(), self.rate.view(-1).numpy(), beta) - lcb, _ = minimize_matrix_quadratic_on_ellipse(Z.numpy(), (W).numpy(), self.rate.view(-1).numpy(), beta) - - map = self.rate.T @ Z @ self.rate - - return dt * map, dt * ucb, -lcb * dt - - def mean_rate(self, S, n=128): - xtest = S.return_discretization(n) - return (self.packing.embed(xtest) @ self.rate) ** 2 - - def mean_rate_latent(self, S, n=128): - xtest = S.return_discretization(n) - return self.packing.embed(xtest) @ self.rate - - def map_lcb_ucb_approx(self, S, n, beta=2.0, delta=0.01): - xtest = S.return_discretization(n) - if self.data is None: - return 0 * xtest[:, 0].view(-1, 1), self.b + 0 * xtest[:, 0].view(-1, 1), self.B + 0 * xtest[:, 0].view(-1, - xtest.size()[ - 0]) - self.fit_ellipsoid_approx() - - Phi = self.packing.embed(xtest).double() - map = Phi @ self.rate - N = Phi.size()[0] - - ucb = torch.zeros(size=(N, 1)).double() - lcb = torch.zeros(size=(N, 1)).double() - - for i in range(N): - x = Phi[i, :].view(-1, 1) - maximum = np.maximum((map[i] - beta * np.sqrt(x.T @ self.W_inv_approx @ x)) ** 2, - (map[i] + beta * np.sqrt(x.T @ self.W_inv_approx @ x)) ** 2) - ucb[i, 0] = np.minimum(maximum, self.B) - lcb[i, 0] = 0. - # lcb[i, 0] = map[i] - np.sqrt(beta) * np.sqrt(x.T @ self.W_inv_approx @ x) ** 2 - return map ** 2, lcb, ucb - - def map_lcb_ucb(self, S, n, beta=2.0, delta=0.01): - """ - Calculate exact confidence using laplace approximation on a whole set domain - :param S: set - :param n: discretization - :param beta: beta - :return: - """ - - xtest = S.return_discretization(n) - if self.data is None: - return self.b + 0 * xtest[:, 0].view(-1, 1), self.b + 0 * xtest[:, 0].view(-1, 1), self.B + 0 * xtest[:, - 0].view(-1, - 1) - - N = xtest.size()[0] - Phi = self.packing.embed(xtest) - map = (Phi @ self.rate) ** 2 - - if self.uncertainty == "laplace": - W = self.construct_covariance_matrix_laplace() - ucb = torch.zeros(size=(N, 1)).double() - lcb = torch.zeros(size=(N, 1)).double() - - for i in range(N): - x = Phi[i, :] - ucbi, _ = maximize_quadratic_on_ellipse(x.numpy(), (W).numpy(), self.rate.view(-1).numpy(), beta) - lcbi, _ = minimize_quadratic_on_ellipse(x.numpy(), (W).numpy(), self.rate.view(-1).numpy(), beta) - ucb[i, 0] = ucbi - lcb[i, 0] = lcbi - - return map, lcb, ucb + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + self.integration = "fixed_quad" + self.product_integrals = {} + self.varLambdas = torch.zeros( + size=(len(self.basic_sets), self.get_m(), self.get_m()) + ).double() + self.opt = "cvxpy" + if self.feedback == "count-record" and self.estimator == "least-sq": + print("precomputing-integrals:") + for index_set, set in enumerate(self.basic_sets): + print(index_set, "/", len(self.basic_sets)) + self.varLambdas[index_set, :] = self.product_integral(set) + self.variances[index_set] = set.volume() * self.B + + def product_integral(self, S): + + if S in self.product_integrals.keys(): + return self.product_integrals[S] + else: + + if "product_integral" in dir(self.packing): + Psi = self.packing.product_integral(S) + self.product_integrals[S] = Psi + return Psi + + elif self.integration == "vec_quad": + + if S.d == 2: + # Psi = torch.zeros(size=(self.get_m(), self.get_m())).double() + F = lambda x: ( + self.packing.embed(x).view(-1, 1) + @ self.packing.embed(x).view(1, -1) + ).view(-1) + integrand = lambda x, y: F( + torch.Tensor([x, y]).view(1, 2).double() + ).numpy() + + val = quadvec2( + integrand, + float(S.bounds[0, 0]), + float(S.bounds[0, 1]), + float(S.bounds[1, 0]), + float(S.bounds[1, 1]), + limit=10, + epsrel=10e-3, + epsabs=10e-3, + quadrature="gk15", + ) + Psi = torch.from_numpy(val).view((self.get_m(), self.get_m())) + + elif self.integration == "fixed_quad": + + if S.d == 1: + weights, nodes = S.return_legendre_discretization(n=128) + Z = self.packing.embed(nodes) + M = torch.einsum("ij,ik->ijk", Z, Z) + Psi = torch.einsum("i,ijk->jk", weights, M) + + if S.d == 2: + weights, nodes = S.return_legendre_discretization(n=50) + Z = self.packing.embed(nodes) + M = torch.einsum("ij,ik->ijk", Z, Z) + Psi = torch.einsum("i,ijk->jk", weights, M) + + else: + Psi = torch.zeros(size=(self.get_m(), self.get_m())).double() + for i in range(self.get_m()): + for j in range(self.get_m()): + + if S.d == 1: + F_ij = lambda x: ( + self.packing.embed( + torch.from_numpy(np.array(x)).view(1, -1) + ).view(-1)[i] + * self.packing.embed( + torch.from_numpy(np.array(x)).view(1, -1) + ).view(-1)[j] + ).numpy() + val, status = integrate.quad( + F_ij, float(S.bounds[0, 0]), float(S.bounds[0, 1]) + ) + + elif S.d == 2: + F_ij = ( + lambda x: self.packing.embed(x).view(-1)[i] + * self.packing.embed(x).view(-1)[j] + ) + integrand = lambda x, y: F_ij( + torch.Tensor([x, y]).view(1, 2).double() + ).numpy() + val, status = integrate.dblquad( + integrand, + float(S.bounds[0, 0]), + float(S.bounds[0, 1]), + lambda x: float(S.bounds[1, 0]), + lambda x: float(S.bounds[1, 1]), + epsabs=1.49e-03, + epsrel=1.49e-03, + ) + else: + raise NotImplementedError( + "Integration above d>2 not implemented." + ) + + Psi[i, j] = val + print(i, j, val) + + self.product_integrals[S] = Psi + return Psi + + def get_constraints(self): + s = self.get_m() + l = np.full(s, self.b) + u = np.full(s, self.B) + Lambda = np.identity(s) + return (l, Lambda, u) + + def cov(self, inverse=False): + s = self.get_m() + + if inverse == False: + return torch.zeros(size=(s, s)).double() + else: + return torch.zeros(size=(s, s)).double(), torch.zeros(size=(s, s)).double() + + def sample(self, verbose=False, steps=10, stepsize=None): + + if self.data is None: + self.sampled_theta = torch.zeros(self.get_m()).double().view(-1, 1) + return None + + if self.observations is not None: + observations = self.observations.double() + sumLambda = self.sumLambda.double() + nabla = ( + lambda theta: -torch.sum( + torch.diag(1.0 / (observations @ theta).view(-1)) @ observations + ) + + (sumLambda.T + sumLambda) @ theta + + self.s * theta.view(-1, 1) + ) + else: + sumLambda = self.sumLambda.double() + nabla = lambda theta: ( + sumLambda.T + sumLambda + ) @ theta + self.s * theta.view(-1, 1) + + theta = self.rate.view(-1, 1) + + W = self.construct_covariance_matrix_laplace() + L = float( + scipy.sparse.linalg.eigsh( + W.numpy(), k=1, which="LM", return_eigenvectors=False, tol=1e-3 + ) + ) + eta = 0.5 / (L + 1) + + for k in range(steps): + W = torch.randn(size=(self.get_m(), 1)).double() + theta = theta - eta * nabla(theta) + np.sqrt(2 * eta) * W + if verbose == True: + print("Iter:", k, theta.T) + + self.sampled_theta = theta + return None + + def sample_value(self, S): + """ + Given a pre-sampled value evaluate certain portions of the domain S + :param S: + :return: + """ + Z = self.product_integral(S) + map = self.sampled_theta.T @ Z @ self.sampled_theta + return map + + def sample_path(self, S, n=128): + xtest = S.return_discretization(n) + return (self.packing.embed(xtest) @ self.sampled_theta) ** 2 + + def load_data(self, data): + super().load_data(data, times=False) + self.sumLambda = torch.zeros(size=(self.get_m(), self.get_m())) + if len(data) > 1: + for sample in data: + (S, obs, dt) = sample + self.sumLambda += self.product_integral(S) * dt + + def add_data_point(self, new_data): + super().add_data_point(new_data, times=False) + (S, obs, dt) = new_data + self.sumLambda += self.product_integral(S) * dt + + def penalized_likelihood(self, threads=4): + sumLambda = self.sumLambda.numpy() + if self.observations is not None: + observations = self.observations.numpy() + loss = lambda theta: float( + -np.sum(np.log((observations @ theta) ** 2)) + + np.dot(theta, sumLambda @ theta) + + 0.5 * self.s * np.sum(theta**2) + ) + else: + loss = lambda theta: float( + np.dot(theta, sumLambda @ theta) + 0.5 * self.s * np.sum(theta**2) + ) + + theta = np.random.randn(self.get_m()) + res = minimize(loss, theta, jac=None, method="L-BFGS-B") + self.rate = torch.from_numpy(res.x) + return self.rate + + def construct_covariance_matrix_laplace(self): + W = torch.zeros(size=(self.get_m(), self.get_m())).double() + + if self.feedback == "count-record": + if self.observations is not None: + for i in range(self.observations.size()[0]): + A = self.observations[i, :].view(-1, 1) @ self.observations[ + i, : + ].view(1, -1) + k = np.maximum( + torch.dot(self.observations[i, :], self.rate.view(-1)) ** 2, + self.b, + ) + W = W + A / k + W += 2 * self.sumLambda + else: + raise AssertionError("Not implemented.") + return W + torch.eye(self.get_m()).double() * self.s + + def map_lcb_ucb_approx_action(self, S, dt=1.0, beta=2.0): + + phi = self.packing.integral(S) + map = phi @ self.rate + + ucb = np.maximum( + (map + beta * np.sqrt(phi @ self.W_inv_approx @ phi.T)) ** 2, + (map - beta * np.sqrt(phi @ self.W_inv_approx @ phi.T)) ** 2, + ) + ucb = np.minimum(ucb, self.B * S.volume() * dt) + lcb = 0.0 + + return dt * map**2, dt * lcb, dt * ucb + + def mean_std_per_action(self, S, W, dt, beta): + Z = self.product_integral(S) + + ucb, _ = maximize_matrix_quadratic_on_ellipse( + Z.numpy(), (W).numpy(), self.rate.view(-1).numpy(), beta + ) + lcb, _ = minimize_matrix_quadratic_on_ellipse( + Z.numpy(), (W).numpy(), self.rate.view(-1).numpy(), beta + ) + + map = self.rate.T @ Z @ self.rate + + return dt * map, dt * ucb, -lcb * dt + + def mean_rate(self, S, n=128): + xtest = S.return_discretization(n) + return (self.packing.embed(xtest) @ self.rate) ** 2 + + def mean_rate_latent(self, S, n=128): + xtest = S.return_discretization(n) + return self.packing.embed(xtest) @ self.rate + + def map_lcb_ucb_approx(self, S, n, beta=2.0, delta=0.01): + xtest = S.return_discretization(n) + if self.data is None: + return ( + 0 * xtest[:, 0].view(-1, 1), + self.b + 0 * xtest[:, 0].view(-1, 1), + self.B + 0 * xtest[:, 0].view(-1, xtest.size()[0]), + ) + self.fit_ellipsoid_approx() + + Phi = self.packing.embed(xtest).double() + map = Phi @ self.rate + N = Phi.size()[0] + + ucb = torch.zeros(size=(N, 1)).double() + lcb = torch.zeros(size=(N, 1)).double() + + for i in range(N): + x = Phi[i, :].view(-1, 1) + maximum = np.maximum( + (map[i] - beta * np.sqrt(x.T @ self.W_inv_approx @ x)) ** 2, + (map[i] + beta * np.sqrt(x.T @ self.W_inv_approx @ x)) ** 2, + ) + ucb[i, 0] = np.minimum(maximum, self.B) + lcb[i, 0] = 0.0 + # lcb[i, 0] = map[i] - np.sqrt(beta) * np.sqrt(x.T @ self.W_inv_approx @ x) ** 2 + return map**2, lcb, ucb + + def map_lcb_ucb(self, S, n, beta=2.0, delta=0.01): + """ + Calculate exact confidence using laplace approximation on a whole set domain + :param S: set + :param n: discretization + :param beta: beta + :return: + """ + + xtest = S.return_discretization(n) + if self.data is None: + return ( + self.b + 0 * xtest[:, 0].view(-1, 1), + self.b + 0 * xtest[:, 0].view(-1, 1), + self.B + 0 * xtest[:, 0].view(-1, 1), + ) + + N = xtest.size()[0] + Phi = self.packing.embed(xtest) + map = (Phi @ self.rate) ** 2 + + if self.uncertainty == "laplace": + W = self.construct_covariance_matrix_laplace() + ucb = torch.zeros(size=(N, 1)).double() + lcb = torch.zeros(size=(N, 1)).double() + + for i in range(N): + x = Phi[i, :] + ucbi, _ = maximize_quadratic_on_ellipse( + x.numpy(), (W).numpy(), self.rate.view(-1).numpy(), beta + ) + lcbi, _ = minimize_quadratic_on_ellipse( + x.numpy(), (W).numpy(), self.rate.view(-1).numpy(), beta + ) + ucb[i, 0] = ucbi + lcb[i, 0] = lcbi + + return map, lcb, ucb class LogisticGaussProcessRateEstimator(PermanentalProcessRateEstimator): - def penalized_likelihood(self, threads=4): - logistic = lambda x: np.log(1 + np.exp(x)) - weights = self.weights.numpy() - nodes = self.nodes.numpy() - - if self.observations is not None: - observations = self.observations.numpy() - loss = lambda theta: float(-np.sum(np.log(logistic(observations @ theta))) + np.sum( - weights * logistic(theta @ nodes.T)) + self.s * np.sum(theta ** 2)) - else: - loss = lambda theta: float(np.sum(weights * logistic(theta @ nodes.T)) + self.s * np.sum(theta ** 2)) - - theta = np.random.randn(self.get_m()) - res = minimize(loss, theta, jac=None, method='L-BFGS-B', - options={'maxcor': 20, 'iprint': -1, 'maxfun': 150000, 'maxls': 50}) - self.rate = torch.from_numpy(res.x) - - return self.rate - - def logistic(self, x): - return torch.log(1 + torch.exp(x)) - - def mean_rate(self, S, n=128): - xtest = S.return_discretization(n) - return self.logistic(self.packing.embed(xtest) @ self.rate) + def penalized_likelihood(self, threads=4): + logistic = lambda x: np.log(1 + np.exp(x)) + weights = self.weights.numpy() + nodes = self.nodes.numpy() + + if self.observations is not None: + observations = self.observations.numpy() + loss = lambda theta: float( + -np.sum(np.log(logistic(observations @ theta))) + + np.sum(weights * logistic(theta @ nodes.T)) + + self.s * np.sum(theta**2) + ) + else: + loss = lambda theta: float( + np.sum(weights * logistic(theta @ nodes.T)) + self.s * np.sum(theta**2) + ) + + theta = np.random.randn(self.get_m()) + res = minimize( + loss, + theta, + jac=None, + method="L-BFGS-B", + options={"maxcor": 20, "iprint": -1, "maxfun": 150000, "maxls": 50}, + ) + self.rate = torch.from_numpy(res.x) + + return self.rate + + def logistic(self, x): + return torch.log(1 + torch.exp(x)) + + def mean_rate(self, S, n=128): + xtest = S.return_discretization(n) + return self.logistic(self.packing.embed(xtest) @ self.rate) class ExpGaussProcessRateEstimator(PermanentalProcessRateEstimator): - def penalized_likelihood(self, threads=4): - weights = self.weights.numpy() - nodes = self.nodes.numpy() - - if self.observations is not None: - observations = self.observations.numpy() - loss = lambda theta: float(np.sum(observations @ theta) + np.sum( - weights * np.exp(-theta @ nodes.T)) + self.s * np.sum(theta ** 2)) - else: - loss = lambda theta: float(np.sum(weights * np.exp(-theta @ nodes.T)) + self.s * np.sum(theta ** 2)) - - theta = np.zeros(self.get_m()) - res = minimize(loss, theta, jac=None, method='L-BFGS-B', options={'maxcor': 20, 'iprint': -1, - 'maxfun': 150000, 'maxls': 100, - 'ftol': 1e-12, 'eps': 1e-12, 'gtol': 1e-8}) - self.rate = torch.from_numpy(res.x) - - return self.rate - - def mean_rate(self, S, n=128): - xtest = S.return_discretization(n) - return torch.exp(-self.packing.embed(xtest) @ self.rate) + def penalized_likelihood(self, threads=4): + weights = self.weights.numpy() + nodes = self.nodes.numpy() + + if self.observations is not None: + observations = self.observations.numpy() + loss = lambda theta: float( + np.sum(observations @ theta) + + np.sum(weights * np.exp(-theta @ nodes.T)) + + self.s * np.sum(theta**2) + ) + else: + loss = lambda theta: float( + np.sum(weights * np.exp(-theta @ nodes.T)) + self.s * np.sum(theta**2) + ) + + theta = np.zeros(self.get_m()) + res = minimize( + loss, + theta, + jac=None, + method="L-BFGS-B", + options={ + "maxcor": 20, + "iprint": -1, + "maxfun": 150000, + "maxls": 100, + "ftol": 1e-12, + "eps": 1e-12, + "gtol": 1e-8, + }, + ) + self.rate = torch.from_numpy(res.x) + + return self.rate + + def mean_rate(self, S, n=128): + xtest = S.return_discretization(n) + return torch.exp(-self.packing.embed(xtest) @ self.rate) if __name__ == "__main__": - torch.manual_seed(2) - np.random.seed(2) - d = 1 - gamma = 0.1 - n = 64 - B = 4. - b = 0.1 - - process = PoissonPointProcess(d=1, B=B, b=b) - Sets = [] - levels = 4 - hierarchical_structure = HierarchicalBorelSets(d=1, interval=(-1, 1), levels=levels) - Sets = hierarchical_structure.get_all_sets() - - D = BorelSet(1, bounds=torch.Tensor([[-1., 1.]]).double()) - - m = 64 - embedding = HermiteEmbedding(m=m, d=1, gamma=gamma) - k = KernelFunction(gamma=gamma) - - estimator5 = PoissonRateEstimator(process, hierarchical_structure, kernel_object=k, B=B, m=m, d=d) - - estimator4 = PermanentalProcessRateEstimator(process, hierarchical_structure, kernel_object=k, B=B, m=m, d=d) - # estimator = PermanentalProcessRateEstimator(process, hierarchical_structure, - # kernel_object=k, B=B, m=m, d=d, embedding=embedding, basis = "custom", approx="ellipsoid") - # estimator = LogGaussProcessRateEstimator(process, hierarchical_structure, kernel_object=k, B=B, m=m, d=d, embedding=embedding, basis = "custom") - estimator = LogGaussProcessRateEstimator(process, hierarchical_structure, kernel_object=k, B=B + 1, m=m, d=d, - embedding=embedding) - - # estimator = LogisticGaussProcessRateEstimator(process, hierarchical_structure, kernel_object=k, B=B, m=m, d=d, embedding=embedding, basis = "custom") - estimator2 = LogisticGaussProcessRateEstimator(process, hierarchical_structure, kernel_object=k, B=B, m=m, d=d, - embedding=embedding) - # estimator = ExpGaussProcessRateEstimator(process, hierarchical_structure, kernel_object=k, B=B, m=m, d=d, embedding=embedding, basis = "custom") - estimator3 = ExpGaussProcessRateEstimator(process, hierarchical_structure, kernel_object=k, B=B, m=m, d=d, - embedding=embedding) - - estimators = [estimator, estimator2, estimator3, estimator4, estimator5] - names = ['sigmoid', 'logistic', 'exp', 'square', 'no-link'] - bands = [True, False, False, False, True] - - estimators = [estimator, estimator5, estimator4] - names = ['sigmoid', 'no-link', 'square'] - bands = [False, False, False] - - min_vol, max_vol = estimator.get_min_max() - dt = 10. / (b * min_vol) - dt = dt * 2 - - print("Suggested dt:", dt) - c = ['k', 'r', 'b', 'y', 'g', 'orange', 'brown', 'purple'] + ['k' for i in range(500)] - - no_sets = len(Sets) - - # no_samples = 3 - # data = [] - # samples = [] - # repeats = 2 - # - # for i in range(no_samples): - # j = np.random.randint(0, no_sets, 1) - # S = Sets[j[0]] - # for _ in range(repeats): - # sample = process.sample_discretized(S, dt) - # samples.append(sample) - # data.append((S, sample, dt)) - # - # sample_D = process.sample_discretized(D, dt) - # samples.append(sample_D) - # no_samples = repeats * no_samples + 1 - # data.append((D, sample_D, dt)) - - data_single = [] - basic_sets = hierarchical_structure.get_sets_level(levels) - samples = [] - - for set in basic_sets: - sample = process.sample_discretized(set, dt) - data_single.append((set, sample, dt)) - samples.append(sample) - data = data_single - - # sample_D = torch.cat(samples) - # data = [(D,sample_D,dt)] - - # data2 = [] - # samples = [] - # for set in basic_sets: - # sample = process.sample_discretized(set,dt*2) - # data2.append((set,sample,dt*2)) - # samples.append(sample) - # - # sample_D_2 = torch.cat(samples) - # data = [(D, sample_D_2, dt*2)] - # - # data = data + data2 - - for estimator, name, band in zip(estimators, names, bands): - estimator.load_data(data) - - xtest = D.return_discretization(n=n) - - # likelihood based - estimator.fit_gp() - rate_mean = estimator.mean_rate(D, n=n) - p = plt.plot(xtest, rate_mean, label='likelihood: ' + name) - - if band == True: - _, lcb, ucb = estimator.map_lcb_ucb(D, n, beta=2.) - plt.fill_between(xtest.numpy().flatten(), lcb.numpy().flatten(), ucb.numpy().flatten(), alpha=0.4, - color=p[0].get_color(), label=name) - - for j in range(len(samples)): - if samples[j] is not None: - plt.plot(samples[j], samples[j] * 0, 'o', color=c[j]) - - # for action in Sets: - # map, lcb, ucb = estimator.map_lcb_ucb_approx_action(action,beta=2.) - # x = np.linspace(action.bounds[0,0],action.bounds[0,1],2) - # plt.plot(x,x*0+float(ucb/action.volume()),'-o', color = "green") - process.visualize(D, samples=0, n=n, dt=1.) - plt.show() + torch.manual_seed(2) + np.random.seed(2) + d = 1 + gamma = 0.1 + n = 64 + B = 4.0 + b = 0.1 + + process = PoissonPointProcess(d=1, B=B, b=b) + Sets = [] + levels = 4 + hierarchical_structure = HierarchicalBorelSets(d=1, interval=(-1, 1), levels=levels) + Sets = hierarchical_structure.get_all_sets() + + D = BorelSet(1, bounds=torch.Tensor([[-1.0, 1.0]]).double()) + + m = 64 + embedding = HermiteEmbedding(m=m, d=1, gamma=gamma) + k = KernelFunction(gamma=gamma) + + estimator5 = PoissonRateEstimator( + process, hierarchical_structure, kernel_object=k, B=B, m=m, d=d + ) + + estimator4 = PermanentalProcessRateEstimator( + process, hierarchical_structure, kernel_object=k, B=B, m=m, d=d + ) + # estimator = PermanentalProcessRateEstimator(process, hierarchical_structure, + # kernel_object=k, B=B, m=m, d=d, embedding=embedding, basis = "custom", approx="ellipsoid") + # estimator = LogGaussProcessRateEstimator(process, hierarchical_structure, kernel_object=k, B=B, m=m, d=d, embedding=embedding, basis = "custom") + estimator = LogGaussProcessRateEstimator( + process, + hierarchical_structure, + kernel_object=k, + B=B + 1, + m=m, + d=d, + embedding=embedding, + ) + + # estimator = LogisticGaussProcessRateEstimator(process, hierarchical_structure, kernel_object=k, B=B, m=m, d=d, embedding=embedding, basis = "custom") + estimator2 = LogisticGaussProcessRateEstimator( + process, + hierarchical_structure, + kernel_object=k, + B=B, + m=m, + d=d, + embedding=embedding, + ) + # estimator = ExpGaussProcessRateEstimator(process, hierarchical_structure, kernel_object=k, B=B, m=m, d=d, embedding=embedding, basis = "custom") + estimator3 = ExpGaussProcessRateEstimator( + process, + hierarchical_structure, + kernel_object=k, + B=B, + m=m, + d=d, + embedding=embedding, + ) + + estimators = [estimator, estimator2, estimator3, estimator4, estimator5] + names = ["sigmoid", "logistic", "exp", "square", "no-link"] + bands = [True, False, False, False, True] + + estimators = [estimator, estimator5, estimator4] + names = ["sigmoid", "no-link", "square"] + bands = [False, False, False] + + min_vol, max_vol = estimator.get_min_max() + dt = 10.0 / (b * min_vol) + dt = dt * 2 + + print("Suggested dt:", dt) + c = ["k", "r", "b", "y", "g", "orange", "brown", "purple"] + [ + "k" for i in range(500) + ] + + no_sets = len(Sets) + + # no_samples = 3 + # data = [] + # samples = [] + # repeats = 2 + # + # for i in range(no_samples): + # j = np.random.randint(0, no_sets, 1) + # S = Sets[j[0]] + # for _ in range(repeats): + # sample = process.sample_discretized(S, dt) + # samples.append(sample) + # data.append((S, sample, dt)) + # + # sample_D = process.sample_discretized(D, dt) + # samples.append(sample_D) + # no_samples = repeats * no_samples + 1 + # data.append((D, sample_D, dt)) + + data_single = [] + basic_sets = hierarchical_structure.get_sets_level(levels) + samples = [] + + for set in basic_sets: + sample = process.sample_discretized(set, dt) + data_single.append((set, sample, dt)) + samples.append(sample) + data = data_single + + # sample_D = torch.cat(samples) + # data = [(D,sample_D,dt)] + + # data2 = [] + # samples = [] + # for set in basic_sets: + # sample = process.sample_discretized(set,dt*2) + # data2.append((set,sample,dt*2)) + # samples.append(sample) + # + # sample_D_2 = torch.cat(samples) + # data = [(D, sample_D_2, dt*2)] + # + # data = data + data2 + + for estimator, name, band in zip(estimators, names, bands): + estimator.load_data(data) + + xtest = D.return_discretization(n=n) + + # likelihood based + estimator.fit_gp() + rate_mean = estimator.mean_rate(D, n=n) + p = plt.plot(xtest, rate_mean, label="likelihood: " + name) + + if band == True: + _, lcb, ucb = estimator.map_lcb_ucb(D, n, beta=2.0) + plt.fill_between( + xtest.numpy().flatten(), + lcb.numpy().flatten(), + ucb.numpy().flatten(), + alpha=0.4, + color=p[0].get_color(), + label=name, + ) + + for j in range(len(samples)): + if samples[j] is not None: + plt.plot(samples[j], samples[j] * 0, "o", color=c[j]) + + # for action in Sets: + # map, lcb, ucb = estimator.map_lcb_ucb_approx_action(action,beta=2.) + # x = np.linspace(action.bounds[0,0],action.bounds[0,1],2) + # plt.plot(x,x*0+float(ucb/action.volume()),'-o', color = "green") + process.visualize(D, samples=0, n=n, dt=1.0) + plt.show() diff --git a/stpy/point_processes/poisson/loglinear_estimator.py b/stpy/point_processes/poisson/loglinear_estimator.py index 4956fb4..0e54199 100644 --- a/stpy/point_processes/poisson/loglinear_estimator.py +++ b/stpy/point_processes/poisson/loglinear_estimator.py @@ -12,169 +12,195 @@ class LogLinearRateEstimator(PoissonRateEstimator): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - def least_squares_weighted(self, threads=0): - theta = cp.Variable(self.get_m()) - - mask = self.bucketized_counts.clone().numpy() > 0 - - observations = self.total_bucketized_obs[mask].clone().numpy() - phis = self.varphis[mask, :].clone().numpy() - tau = self.total_bucketized_time.clone().numpy() - - variances = self.variances.view(-1).clone().numpy() - - for i in range(variances.shape[0]): - if mask[i] > 0: - variances[i] = variances[i] * tau[i] * self.variance_correction(variances[i] * tau[i]) - - selected_variances = variances[mask] - print(np.log(observations)) - print(selected_variances) - objective = cp.Minimize( - cp.sum_squares((phis @ theta) - np.log(observations) / tau[mask])) # + self.s * cp.norm2(theta)) - - prob = cp.Problem(objective) - - prob.solve(solver=cp.MOSEK, warm_start=False, verbose=True, - mosek_params={mosek.iparam.num_threads: threads}) - - self.rate = torch.from_numpy(theta.value) - print(self.rate) - return self.rate - - def mean_var_reg_set(self, S, dt=1., beta=2.): - if self.approx_fit == False: - self.W = self.construct_covariance_matrix_regression() - self.approx_fit = True - - map = 0 - lcb = 0 - ucb = 0 - for set in self.basic_sets: - if S.inside(set): - x = self.packing.integral(set).view(-1, 1) - lcb = lcb + torch.exp(dt * (x @ self.rate - beta * np.sqrt(x.T @ self.W_inv @ x))) - ucb = ucb + torch.exp(dt * (x @ self.rate + beta * np.sqrt(x.T @ self.W_inv @ x))) - map = map + torch.exp(dt * x @ self.rate) - return map, ucb, lcb - - def fit_ellipsoid_approx(self): - self.W = self.construct_covariance_matrix_regression() - self.W_inv = torch.pinverse(self.W) - - # def map_lcb_ucb_approx_action(self, S, dt=1., beta=2.): - # phi = self.packing.integral(S) * dt - # map = phi @ self.rate - # ucb = map + beta * np.sqrt(phi @ self.W_inv_approx @ phi.T) - # ucb = np.minimum(ucb, self.B * S.volume() * dt) - # - # lcb = map - beta * np.sqrt(phi @ self.W_inv_approx @ phi.T) - # lcb = np.maximum(lcb, self.b * S.volume() * dt) - # return map, lcb, ucb - - def construct_covariance_matrix_regression(self): - - W = torch.zeros(size=(self.get_m(), self.get_m())).double() - - if self.data is not None: - variances = self.variances - - if self.feedback == "count-record": - mask = self.bucketized_counts > 0 - tau = self.total_bucketized_time - for index_o, o in enumerate(self.bucketized_obs): - n = mask[index_o] - if n > 0: - A = self.varphis[index_o, :].view(-1, 1) @ self.varphis[index_o, :].view(1, -1) * tau[index_o] - W = W + A / (variances[index_o]) - - elif self.feedback == "histogram": - - for datapoint in self.data: - (S, obs, dt) = datapoint - varphi = self.packing.integral(S) * dt - variance = varphi @ self.rate - variance = variance - A = varphi.view(-1, 1) @ varphi.view(1, -1) - W = W + A / variance - - return W + torch.eye(self.get_m()).double() * self.s - - def mean_set(self, S, dt=1.): - mu = 0 - for set in self.basic_sets: - if S.inside(set): - mu = mu + torch.exp(dt * self.packing.integral(set) @ self.rate) - return mu + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def least_squares_weighted(self, threads=0): + theta = cp.Variable(self.get_m()) + + mask = self.bucketized_counts.clone().numpy() > 0 + + observations = self.total_bucketized_obs[mask].clone().numpy() + phis = self.varphis[mask, :].clone().numpy() + tau = self.total_bucketized_time.clone().numpy() + + variances = self.variances.view(-1).clone().numpy() + + for i in range(variances.shape[0]): + if mask[i] > 0: + variances[i] = ( + variances[i] + * tau[i] + * self.variance_correction(variances[i] * tau[i]) + ) + + selected_variances = variances[mask] + print(np.log(observations)) + print(selected_variances) + objective = cp.Minimize( + cp.sum_squares((phis @ theta) - np.log(observations) / tau[mask]) + ) # + self.s * cp.norm2(theta)) + + prob = cp.Problem(objective) + + prob.solve( + solver=cp.MOSEK, + warm_start=False, + verbose=True, + mosek_params={mosek.iparam.num_threads: threads}, + ) + + self.rate = torch.from_numpy(theta.value) + print(self.rate) + return self.rate + + def mean_var_reg_set(self, S, dt=1.0, beta=2.0): + if self.approx_fit == False: + self.W = self.construct_covariance_matrix_regression() + self.approx_fit = True + + map = 0 + lcb = 0 + ucb = 0 + for set in self.basic_sets: + if S.inside(set): + x = self.packing.integral(set).view(-1, 1) + lcb = lcb + torch.exp( + dt * (x @ self.rate - beta * np.sqrt(x.T @ self.W_inv @ x)) + ) + ucb = ucb + torch.exp( + dt * (x @ self.rate + beta * np.sqrt(x.T @ self.W_inv @ x)) + ) + map = map + torch.exp(dt * x @ self.rate) + return map, ucb, lcb + + def fit_ellipsoid_approx(self): + self.W = self.construct_covariance_matrix_regression() + self.W_inv = torch.pinverse(self.W) + + # def map_lcb_ucb_approx_action(self, S, dt=1., beta=2.): + # phi = self.packing.integral(S) * dt + # map = phi @ self.rate + # ucb = map + beta * np.sqrt(phi @ self.W_inv_approx @ phi.T) + # ucb = np.minimum(ucb, self.B * S.volume() * dt) + # + # lcb = map - beta * np.sqrt(phi @ self.W_inv_approx @ phi.T) + # lcb = np.maximum(lcb, self.b * S.volume() * dt) + # return map, lcb, ucb + + def construct_covariance_matrix_regression(self): + + W = torch.zeros(size=(self.get_m(), self.get_m())).double() + + if self.data is not None: + variances = self.variances + + if self.feedback == "count-record": + mask = self.bucketized_counts > 0 + tau = self.total_bucketized_time + for index_o, o in enumerate(self.bucketized_obs): + n = mask[index_o] + if n > 0: + A = ( + self.varphis[index_o, :].view(-1, 1) + @ self.varphis[index_o, :].view(1, -1) + * tau[index_o] + ) + W = W + A / (variances[index_o]) + + elif self.feedback == "histogram": + + for datapoint in self.data: + (S, obs, dt) = datapoint + varphi = self.packing.integral(S) * dt + variance = varphi @ self.rate + variance = variance + A = varphi.view(-1, 1) @ varphi.view(1, -1) + W = W + A / variance + + return W + torch.eye(self.get_m()).double() * self.s + + def mean_set(self, S, dt=1.0): + mu = 0 + for set in self.basic_sets: + if S.inside(set): + mu = mu + torch.exp(dt * self.packing.integral(set) @ self.rate) + return mu if __name__ == "__main__": - torch.manual_seed(2) - np.random.seed(2) - d = 1 - gamma = 0.1 - n = 64 - B = 4. - b = 0.1 - - process = PoissonPointProcess(d=1, B=B, b=b) - Sets = [] - levels = 5 - hierarchical_structure = HierarchicalBorelSets(d=1, interval=(-1, 1), levels=levels) - Sets = hierarchical_structure.get_all_sets() - - D = BorelSet(1, bounds=torch.Tensor([[-1., 1.]]).double()) - - m = 128 - k = KernelFunction(gamma=gamma) - estimator = LogLinearRateEstimator(process, hierarchical_structure, - kernel_object=k, B=B, m=m, d=d, estimator='least-sq') - - min_vol, max_vol = estimator.get_min_max() - - dt = 1. / (b * min_vol) - dt = dt * 2 - - print("Suggested dt:", dt) - c = ['k', 'r', 'b', 'y', 'g', 'orange', 'brown', 'purple'] + ['k' for i in range(500)] - - no_sets = len(Sets) - no_samples = 0 - data = [] - samples = [] - repeats = 2 - - for i in range(no_samples): - j = np.random.randint(0, no_sets, 1) - S = Sets[j[0]] - for _ in range(repeats): - sample = process.sample_discretized(S, dt) - samples.append(sample) - data.append((S, sample, dt)) - - sample_D = process.sample_discretized(D, dt) - samples.append(sample_D) - no_samples = repeats * no_samples + 1 - data.append((D, sample_D, dt)) - - estimator.load_data(data) - - xtest = D.return_discretization(n=n) - - # likelihood based - estimator.fit_gp() - - for set in estimator.basic_sets: - x = np.linspace(set.bounds[0, 0], set.bounds[0, 1], 2) - val = estimator.mean_set(set) - plt.plot(x, x * 0 + float(val), 'b-o') - vol = process.rate_volume(set) - plt.plot(x, x * 0 + float(vol), '-o', color='orange') - for j in range(no_samples): - if samples[j] is not None: - plt.plot(samples[j], samples[j] * 0, 'o', color=c[j]) - - process.visualize(D, samples=0, n=n, dt=1.) + torch.manual_seed(2) + np.random.seed(2) + d = 1 + gamma = 0.1 + n = 64 + B = 4.0 + b = 0.1 + + process = PoissonPointProcess(d=1, B=B, b=b) + Sets = [] + levels = 5 + hierarchical_structure = HierarchicalBorelSets(d=1, interval=(-1, 1), levels=levels) + Sets = hierarchical_structure.get_all_sets() + + D = BorelSet(1, bounds=torch.Tensor([[-1.0, 1.0]]).double()) + + m = 128 + k = KernelFunction(gamma=gamma) + estimator = LogLinearRateEstimator( + process, + hierarchical_structure, + kernel_object=k, + B=B, + m=m, + d=d, + estimator="least-sq", + ) + + min_vol, max_vol = estimator.get_min_max() + + dt = 1.0 / (b * min_vol) + dt = dt * 2 + + print("Suggested dt:", dt) + c = ["k", "r", "b", "y", "g", "orange", "brown", "purple"] + [ + "k" for i in range(500) + ] + + no_sets = len(Sets) + no_samples = 0 + data = [] + samples = [] + repeats = 2 + + for i in range(no_samples): + j = np.random.randint(0, no_sets, 1) + S = Sets[j[0]] + for _ in range(repeats): + sample = process.sample_discretized(S, dt) + samples.append(sample) + data.append((S, sample, dt)) + + sample_D = process.sample_discretized(D, dt) + samples.append(sample_D) + no_samples = repeats * no_samples + 1 + data.append((D, sample_D, dt)) + + estimator.load_data(data) + + xtest = D.return_discretization(n=n) + + # likelihood based + estimator.fit_gp() + + for set in estimator.basic_sets: + x = np.linspace(set.bounds[0, 0], set.bounds[0, 1], 2) + val = estimator.mean_set(set) + plt.plot(x, x * 0 + float(val), "b-o") + vol = process.rate_volume(set) + plt.plot(x, x * 0 + float(vol), "-o", color="orange") + for j in range(no_samples): + if samples[j] is not None: + plt.plot(samples[j], samples[j] * 0, "o", color=c[j]) + + process.visualize(D, samples=0, n=n, dt=1.0) diff --git a/stpy/point_processes/poisson/mbr_positive_estimator.py b/stpy/point_processes/poisson/mbr_positive_estimator.py index 5924d22..886817b 100644 --- a/stpy/point_processes/poisson/mbr_positive_estimator.py +++ b/stpy/point_processes/poisson/mbr_positive_estimator.py @@ -8,355 +8,432 @@ from stpy.embeddings.embedding import HermiteEmbedding from stpy.kernels import KernelFunction from stpy.point_processes.poisson import PoissonPointProcess -from stpy.point_processes.poisson.link_fun_rate_estimator import PermanentalProcessRateEstimator +from stpy.point_processes.poisson.link_fun_rate_estimator import ( + PermanentalProcessRateEstimator, +) class MBRPositiveEstimator(PermanentalProcessRateEstimator): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - if self.feedback == "count-record": - self.varLambdas_vec = torch.zeros( - size=(self.varLambdas.size()[0], self.varLambdas.size()[1] * self.varLambdas.size()[2])).double() - for i in range(self.varLambdas.size()[0]): - self.varLambdas_vec[i, :] = self.varLambdas[i, :, :].reshape(-1) - - self.approx_solver = True - - def fit_gp(self, threads=4): - if self.data is not None: - super().fit_gp(threads=threads) - else: - self.rate = None - - def mean_rate(self, S, n=128): - xtest = S.return_discretization(n) - emb = self.packing.embed(xtest) - mu = torch.einsum('ij,jk,ik->i', emb, self.rate, emb).view(-1, 1) - return mu - - def rate_value(self, x, dt=1): - emb = self.packing.embed(x) * dt - mu = torch.einsum('ij,jk,ik->i', emb, self.rate, emb).view(-1, 1) - return mu - - def mean_set(self, S, dt=1.): - if self.data is not None: - emb = self.product_integral(S) * dt - mu = torch.trace(emb @ self.rate).view(1, 1) - else: - mu = self.b * S.volume() - return mu - - def penalized_likelihood(self, threads=4): - sumLambda = self.sumLambda.numpy() - Theta = cp.Variable((self.get_m(), self.get_m()), symmetric=True) - - if self.observations is not None: - observations = self.observations.numpy() - # cost = cp.sum_squares(cp.diag(emb @ A @ emb.T) - y.view(-1).numpy()) / (self.s ** 2) + (self.lam) * cp.norm(A, "fro") - objective = -cp.sum(cp.log(observations @ Theta @ observations.T)) + \ - cp.trace(sumLambda @ Theta) + self.s * cp.sum_squares(cp.vec(Theta)) - else: - objective = cp.trace(sumLambda @ Theta) + self.s * cp.sum_squares(cp.vec(Theta)) - - # if self.get_m() == 2: - # # use Lorentz-cone special result - # constraints = [cp.SOC(Theta[0,0]+Theta[1,1],Theta[1,1] )] - # else: - # constraints = [Theta >> 0] - constraints = [] - prob = cp.Problem(cp.Minimize(objective), constraints) - - prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False, - mosek_params={mosek.iparam.num_threads: threads, - mosek.iparam.intpnt_solve_form: mosek.solveform.dual, - mosek.dparam.intpnt_co_tol_pfeas: 1e-3, - mosek.dparam.intpnt_co_tol_dfeas: 1e-3, - mosek.dparam.intpnt_co_tol_rel_gap: 1e-3}) - self.rate = torch.from_numpy(Theta.value) - return self.rate - - def penalized_likelihood_bins(self, threads=4): - Theta = cp.Variable((self.get_m(), self.get_m()), symmetric=True) - - mask = self.bucketized_counts.clone().numpy() > 0 - observations = self.total_bucketized_obs[mask].clone().numpy() - tau = self.total_bucketized_time[mask].clone().numpy() - varLambdas_vec = self.varLambdas_vec[mask, :].clone().numpy() - - objective = -cp.sum(observations @ cp.log(cp.multiply(tau, varLambdas_vec @ cp.vec(Theta)))) + \ - cp.sum(cp.multiply(tau, varLambdas_vec @ cp.vec(Theta))) + self.s * cp.sum_squares(cp.vec(Theta)) - - constraints = [Theta >> 0] - prob = cp.Problem(cp.Minimize(objective), constraints) - - prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False, - mosek_params={mosek.iparam.num_threads: threads, - mosek.iparam.intpnt_solve_form: mosek.solveform.dual, - mosek.dparam.intpnt_co_tol_pfeas: 1e-3, - mosek.dparam.intpnt_co_tol_dfeas: 1e-3, - mosek.dparam.intpnt_co_tol_rel_gap: 1e-3}) - self.rate = torch.from_numpy(Theta.value) - return self.rate - - def least_squares_weighted(self, threads=4): - - if self.approx_fit == False: - self.bucketization() - - Theta = cp.Variable((self.get_m(), self.get_m()), symmetric=True) - - mask = self.bucketized_counts.clone().numpy() > 0 - observations = self.total_bucketized_obs[mask].clone().numpy() - tau = self.total_bucketized_time.clone().numpy() - - # varsumLambdas - varLambdas_vec = self.varLambdas_vec[mask, :].clone().numpy() - - variances = self.variances.view(-1).clone().numpy() - - for i in range(variances.shape[0]): - if mask[i] > 0: - variances[i] = variances[i] * tau[i] * self.variance_correction(variances[i] * tau[i]) - - selected_variances = variances[mask] - - objective = cp.sum_squares((varLambdas_vec @ cp.vec(Theta) + - - observations) / np.sqrt(selected_variances)) + self.s * cp.sum_squares( - cp.vec(Theta)) / 2 - constraints = [Theta >> 0] - prob = cp.Problem(cp.Minimize(objective), constraints) - - prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False, - mosek_params={mosek.iparam.num_threads: threads, - mosek.iparam.intpnt_solve_form: mosek.solveform.dual, - mosek.dparam.intpnt_co_tol_pfeas: 1e-3, - mosek.dparam.intpnt_co_tol_dfeas: 1e-3, - mosek.dparam.intpnt_co_tol_rel_gap: 1e-3}) - - self.rate = torch.from_numpy(Theta.value) - return self.rate - - def construct_covariance_matrix(self): - if self.estimator == "bins": - self.construct_covariance_matrix_bins() - elif self.estimator == "least-sq": - self.construct_covariance_matrix_regression() - else: - raise NotImplementedError("Covariance not implemented") - - def construct_covariance_matrix_regression(self): - varLambdas = self.varLambdas_vec.clone() - variances = self.variances - mask = self.bucketized_counts > 0 - tau = self.total_bucketized_time - W = torch.zeros(size=(self.get_m() ** 2, self.get_m() ** 2)).double() - I = torch.eye(self.get_m() ** 2).double() - W_inv = self.s * torch.eye(self.get_m() ** 2).double() - - for index_o, o in enumerate(self.bucketized_obs): - n = mask[index_o] - if n > 0: - k = self.variance_correction(tau[index_o] * variances[index_o]) - v = tau[index_o] / (variances[index_o] * k) - - vec = varLambdas[index_o, :].view(-1, 1) - A = vec @ vec.T - W = W + A * v - denom = 1. + v * vec.T @ W_inv @ vec - W_inv = W_inv @ (I - v * vec @ (vec.T @ W_inv) / denom) - - self.W = W + self.s * torch.eye(self.get_m() ** 2).double() - self.W_inv = W_inv - # self.W_cholesky = torch.cholesky(self.W, upper=True) - return self.W - - def construct_covariance_matrix_bins(self): - self.construct_covariance_matrix_regression() - - def mean_var_reg_set(self, S, dt=1., beta=2., lcb_compute=False): - - if self.data is None: - return S.volume() * self.b, S.volume() * self.B, S.volume() * self.b - - if self.approx_fit == False: - self.W = self.construct_covariance_matrix() - self.approx_fit = True - - map = None - lcb = None - - if self.approx_solver == True: - ucb = self.band_no_opt(S, beta=beta, dt=dt, maximization=True) - if lcb_compute == True: - lcb = self.band_no_opt(S, beta=beta, dt=dt, maximization=False) - else: - ucb = self.band(S, beta=beta, dt=dt, maximization=True) - if lcb_compute == True: - lcb = self.band(S, beta=beta, dt=dt, maximization=False) - - return map, ucb, lcb - - def mean_var_bins_set(self, S, dt=1., beta=2., lcb_compute=False): - return self.mean_var_reg_set(S, dt=dt, beta=beta, lcb_compute=lcb_compute) - - def band(self, S, beta=2., dt=1., maximization=True): - emb = self.product_integral(S) * dt - A = cp.Variable((self.get_m(), self.get_m()), symmetric=True) - cost = cp.trace(A @ emb) - Z = self.W_cholesky.clone() - zero = np.zeros(self.get_m() ** 2) - constraints = [cp.SOC(zero.T @ cp.vec(A) + self.s * beta ** 2, Z @ (cp.vec(A) - cp.vec(self.rate.numpy())))] - constraints += [A >> 0] - - if maximization == True: - prob = cp.Problem(cp.Maximize(cost), constraints) - else: - prob = cp.Problem(cp.Minimize(cost), constraints) - - prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False, - mosek_params={mosek.iparam.num_threads: 4, - mosek.iparam.intpnt_solve_form: mosek.solveform.dual, - mosek.dparam.intpnt_co_tol_pfeas: 1e-3, - mosek.dparam.intpnt_co_tol_dfeas: 1e-3, - mosek.dparam.intpnt_co_tol_rel_gap: 1e-3}) - ucb = torch.trace(torch.from_numpy(A.value) @ emb) - return ucb - - def band_no_opt(self, S, beta=2., dt=1., maximization=True): - - if self.rate is None: - if maximization == True: - return S.volume() * dt * self.B - else: - return S.volume() * dt * self.b - else: - emb = self.product_integral(S) - cost = torch.trace(self.rate @ emb) - if maximization == True: - out = cost + beta * emb.view(1, -1) @ self.W_inv @ emb.view(-1, 1) - else: - out = np.maximum(cost - beta * emb.view(1, -1) @ self.W_inv @ emb.view(-1, 1), 0.) - return out * dt - - def gap(self, S, actions, w, dt, beta=2.): - """ - Estimates the gap of an action S, - :param S: - :param dt: - :return: - """ - - if self.data is None: - return (self.B - self.b) * S.volume() / w(S) - - if self.ucb_identified == False: - print("Recomputing UCB.....") - self.ucb_identified = True - self.max_ucb = -1000 - self.ucb_action = None - for action in actions: - _, ucb, __ = self.mean_var_reg_set(action, dt=dt, beta=self.beta(0)) - ucb = ucb / w(action) - if ucb > self.max_ucb: - self.max_ucb = ucb - self.ucb_action = action - map, ucb, lcb = self.mean_var_reg_set(S, dt=dt, beta=self.beta(0), lcb_compute=True) - gap = w(S) * self.max_ucb - lcb - return gap - - def information(self, S, dt, precomputed=None): - - if self.data is None: - return 1. - - if self.W is None: - self.construct_covariance_matrix() - - if self.feedback == "count-record": - varphi_UCB = self.product_integral(self.ucb_action).view(1, -1) * dt - - ind = [] - for index, set in enumerate(self.basic_sets): - if S.inside(set): - ind.append(index) - Upsilon = self.varLambdas_vec[ind, :] * dt - - I = torch.eye(Upsilon.size()[0]).double() - G = self.W_inv - self.W_inv @ Upsilon.T @ torch.inverse(I + Upsilon @ Upsilon.T) @ Upsilon @ self.W_inv - return 10e-4 + torch.logdet(varphi_UCB @ self.W_inv @ varphi_UCB.T) - torch.logdet( - varphi_UCB @ G @ varphi_UCB.T) - - elif self.feedback == "histogram": - raise NotImplementedError("Not implemented.") + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + if self.feedback == "count-record": + self.varLambdas_vec = torch.zeros( + size=( + self.varLambdas.size()[0], + self.varLambdas.size()[1] * self.varLambdas.size()[2], + ) + ).double() + for i in range(self.varLambdas.size()[0]): + self.varLambdas_vec[i, :] = self.varLambdas[i, :, :].reshape(-1) + + self.approx_solver = True + + def fit_gp(self, threads=4): + if self.data is not None: + super().fit_gp(threads=threads) + else: + self.rate = None + + def mean_rate(self, S, n=128): + xtest = S.return_discretization(n) + emb = self.packing.embed(xtest) + mu = torch.einsum("ij,jk,ik->i", emb, self.rate, emb).view(-1, 1) + return mu + + def rate_value(self, x, dt=1): + emb = self.packing.embed(x) * dt + mu = torch.einsum("ij,jk,ik->i", emb, self.rate, emb).view(-1, 1) + return mu + + def mean_set(self, S, dt=1.0): + if self.data is not None: + emb = self.product_integral(S) * dt + mu = torch.trace(emb @ self.rate).view(1, 1) + else: + mu = self.b * S.volume() + return mu + + def penalized_likelihood(self, threads=4): + sumLambda = self.sumLambda.numpy() + Theta = cp.Variable((self.get_m(), self.get_m()), symmetric=True) + + if self.observations is not None: + observations = self.observations.numpy() + # cost = cp.sum_squares(cp.diag(emb @ A @ emb.T) - y.view(-1).numpy()) / (self.s ** 2) + (self.lam) * cp.norm(A, "fro") + objective = ( + -cp.sum(cp.log(observations @ Theta @ observations.T)) + + cp.trace(sumLambda @ Theta) + + self.s * cp.sum_squares(cp.vec(Theta)) + ) + else: + objective = cp.trace(sumLambda @ Theta) + self.s * cp.sum_squares( + cp.vec(Theta) + ) + + # if self.get_m() == 2: + # # use Lorentz-cone special result + # constraints = [cp.SOC(Theta[0,0]+Theta[1,1],Theta[1,1] )] + # else: + # constraints = [Theta >> 0] + constraints = [] + prob = cp.Problem(cp.Minimize(objective), constraints) + + prob.solve( + solver=cp.MOSEK, + warm_start=False, + verbose=False, + mosek_params={ + mosek.iparam.num_threads: threads, + mosek.iparam.intpnt_solve_form: mosek.solveform.dual, + mosek.dparam.intpnt_co_tol_pfeas: 1e-3, + mosek.dparam.intpnt_co_tol_dfeas: 1e-3, + mosek.dparam.intpnt_co_tol_rel_gap: 1e-3, + }, + ) + self.rate = torch.from_numpy(Theta.value) + return self.rate + + def penalized_likelihood_bins(self, threads=4): + Theta = cp.Variable((self.get_m(), self.get_m()), symmetric=True) + + mask = self.bucketized_counts.clone().numpy() > 0 + observations = self.total_bucketized_obs[mask].clone().numpy() + tau = self.total_bucketized_time[mask].clone().numpy() + varLambdas_vec = self.varLambdas_vec[mask, :].clone().numpy() + + objective = ( + -cp.sum( + observations @ cp.log(cp.multiply(tau, varLambdas_vec @ cp.vec(Theta))) + ) + + cp.sum(cp.multiply(tau, varLambdas_vec @ cp.vec(Theta))) + + self.s * cp.sum_squares(cp.vec(Theta)) + ) + + constraints = [Theta >> 0] + prob = cp.Problem(cp.Minimize(objective), constraints) + + prob.solve( + solver=cp.MOSEK, + warm_start=False, + verbose=False, + mosek_params={ + mosek.iparam.num_threads: threads, + mosek.iparam.intpnt_solve_form: mosek.solveform.dual, + mosek.dparam.intpnt_co_tol_pfeas: 1e-3, + mosek.dparam.intpnt_co_tol_dfeas: 1e-3, + mosek.dparam.intpnt_co_tol_rel_gap: 1e-3, + }, + ) + self.rate = torch.from_numpy(Theta.value) + return self.rate + + def least_squares_weighted(self, threads=4): + + if self.approx_fit == False: + self.bucketization() + + Theta = cp.Variable((self.get_m(), self.get_m()), symmetric=True) + + mask = self.bucketized_counts.clone().numpy() > 0 + observations = self.total_bucketized_obs[mask].clone().numpy() + tau = self.total_bucketized_time.clone().numpy() + + # varsumLambdas + varLambdas_vec = self.varLambdas_vec[mask, :].clone().numpy() + + variances = self.variances.view(-1).clone().numpy() + + for i in range(variances.shape[0]): + if mask[i] > 0: + variances[i] = ( + variances[i] + * tau[i] + * self.variance_correction(variances[i] * tau[i]) + ) + + selected_variances = variances[mask] + + objective = ( + cp.sum_squares( + (varLambdas_vec @ cp.vec(Theta) + -observations) + / np.sqrt(selected_variances) + ) + + self.s * cp.sum_squares(cp.vec(Theta)) / 2 + ) + constraints = [Theta >> 0] + prob = cp.Problem(cp.Minimize(objective), constraints) + + prob.solve( + solver=cp.MOSEK, + warm_start=False, + verbose=False, + mosek_params={ + mosek.iparam.num_threads: threads, + mosek.iparam.intpnt_solve_form: mosek.solveform.dual, + mosek.dparam.intpnt_co_tol_pfeas: 1e-3, + mosek.dparam.intpnt_co_tol_dfeas: 1e-3, + mosek.dparam.intpnt_co_tol_rel_gap: 1e-3, + }, + ) + + self.rate = torch.from_numpy(Theta.value) + return self.rate + + def construct_covariance_matrix(self): + if self.estimator == "bins": + self.construct_covariance_matrix_bins() + elif self.estimator == "least-sq": + self.construct_covariance_matrix_regression() + else: + raise NotImplementedError("Covariance not implemented") + + def construct_covariance_matrix_regression(self): + varLambdas = self.varLambdas_vec.clone() + variances = self.variances + mask = self.bucketized_counts > 0 + tau = self.total_bucketized_time + W = torch.zeros(size=(self.get_m() ** 2, self.get_m() ** 2)).double() + I = torch.eye(self.get_m() ** 2).double() + W_inv = self.s * torch.eye(self.get_m() ** 2).double() + + for index_o, o in enumerate(self.bucketized_obs): + n = mask[index_o] + if n > 0: + k = self.variance_correction(tau[index_o] * variances[index_o]) + v = tau[index_o] / (variances[index_o] * k) + + vec = varLambdas[index_o, :].view(-1, 1) + A = vec @ vec.T + W = W + A * v + denom = 1.0 + v * vec.T @ W_inv @ vec + W_inv = W_inv @ (I - v * vec @ (vec.T @ W_inv) / denom) + + self.W = W + self.s * torch.eye(self.get_m() ** 2).double() + self.W_inv = W_inv + # self.W_cholesky = torch.cholesky(self.W, upper=True) + return self.W + + def construct_covariance_matrix_bins(self): + self.construct_covariance_matrix_regression() + + def mean_var_reg_set(self, S, dt=1.0, beta=2.0, lcb_compute=False): + + if self.data is None: + return S.volume() * self.b, S.volume() * self.B, S.volume() * self.b + + if self.approx_fit == False: + self.W = self.construct_covariance_matrix() + self.approx_fit = True + + map = None + lcb = None + + if self.approx_solver == True: + ucb = self.band_no_opt(S, beta=beta, dt=dt, maximization=True) + if lcb_compute == True: + lcb = self.band_no_opt(S, beta=beta, dt=dt, maximization=False) + else: + ucb = self.band(S, beta=beta, dt=dt, maximization=True) + if lcb_compute == True: + lcb = self.band(S, beta=beta, dt=dt, maximization=False) + + return map, ucb, lcb + + def mean_var_bins_set(self, S, dt=1.0, beta=2.0, lcb_compute=False): + return self.mean_var_reg_set(S, dt=dt, beta=beta, lcb_compute=lcb_compute) + + def band(self, S, beta=2.0, dt=1.0, maximization=True): + emb = self.product_integral(S) * dt + A = cp.Variable((self.get_m(), self.get_m()), symmetric=True) + cost = cp.trace(A @ emb) + Z = self.W_cholesky.clone() + zero = np.zeros(self.get_m() ** 2) + constraints = [ + cp.SOC( + zero.T @ cp.vec(A) + self.s * beta**2, + Z @ (cp.vec(A) - cp.vec(self.rate.numpy())), + ) + ] + constraints += [A >> 0] + + if maximization == True: + prob = cp.Problem(cp.Maximize(cost), constraints) + else: + prob = cp.Problem(cp.Minimize(cost), constraints) + + prob.solve( + solver=cp.MOSEK, + warm_start=False, + verbose=False, + mosek_params={ + mosek.iparam.num_threads: 4, + mosek.iparam.intpnt_solve_form: mosek.solveform.dual, + mosek.dparam.intpnt_co_tol_pfeas: 1e-3, + mosek.dparam.intpnt_co_tol_dfeas: 1e-3, + mosek.dparam.intpnt_co_tol_rel_gap: 1e-3, + }, + ) + ucb = torch.trace(torch.from_numpy(A.value) @ emb) + return ucb + + def band_no_opt(self, S, beta=2.0, dt=1.0, maximization=True): + + if self.rate is None: + if maximization == True: + return S.volume() * dt * self.B + else: + return S.volume() * dt * self.b + else: + emb = self.product_integral(S) + cost = torch.trace(self.rate @ emb) + if maximization == True: + out = cost + beta * emb.view(1, -1) @ self.W_inv @ emb.view(-1, 1) + else: + out = np.maximum( + cost - beta * emb.view(1, -1) @ self.W_inv @ emb.view(-1, 1), 0.0 + ) + return out * dt + + def gap(self, S, actions, w, dt, beta=2.0): + """ + Estimates the gap of an action S, + :param S: + :param dt: + :return: + """ + + if self.data is None: + return (self.B - self.b) * S.volume() / w(S) + + if self.ucb_identified == False: + print("Recomputing UCB.....") + self.ucb_identified = True + self.max_ucb = -1000 + self.ucb_action = None + for action in actions: + _, ucb, __ = self.mean_var_reg_set(action, dt=dt, beta=self.beta(0)) + ucb = ucb / w(action) + if ucb > self.max_ucb: + self.max_ucb = ucb + self.ucb_action = action + map, ucb, lcb = self.mean_var_reg_set( + S, dt=dt, beta=self.beta(0), lcb_compute=True + ) + gap = w(S) * self.max_ucb - lcb + return gap + + def information(self, S, dt, precomputed=None): + + if self.data is None: + return 1.0 + + if self.W is None: + self.construct_covariance_matrix() + + if self.feedback == "count-record": + varphi_UCB = self.product_integral(self.ucb_action).view(1, -1) * dt + + ind = [] + for index, set in enumerate(self.basic_sets): + if S.inside(set): + ind.append(index) + Upsilon = self.varLambdas_vec[ind, :] * dt + + I = torch.eye(Upsilon.size()[0]).double() + G = ( + self.W_inv + - self.W_inv + @ Upsilon.T + @ torch.inverse(I + Upsilon @ Upsilon.T) + @ Upsilon + @ self.W_inv + ) + return ( + 10e-4 + + torch.logdet(varphi_UCB @ self.W_inv @ varphi_UCB.T) + - torch.logdet(varphi_UCB @ G @ varphi_UCB.T) + ) + + elif self.feedback == "histogram": + raise NotImplementedError("Not implemented.") if __name__ == "__main__": - torch.manual_seed(2) - np.random.seed(2) - d = 1 - gamma = 0.2 - n = 64 - B = 4. - b = 0.5 - - process = PoissonPointProcess(d=1, B=B, b=b) - Sets = [] - levels = 3 - hierarchical_structure = HierarchicalBorelSets(d=1, interval=(-1, 1), levels=levels) - Sets = hierarchical_structure.get_all_sets() - - D = BorelSet(1, bounds=torch.Tensor([[-1., 1.]]).double()) - - m = 32 - embedding = HermiteEmbedding(m=m, d=1, gamma=gamma) - k = KernelFunction(gamma=gamma) - estimator = MBRPositiveEstimator(process, hierarchical_structure, kernel_object=k, - B=B, m=m, d=d, embedding=embedding, basis="custom") - min_vol, max_vol = estimator.get_min_max() - - dt = 10. / (b * min_vol) - dt = dt * 2 - - print("Suggested dt:", dt) - c = ['k', 'r', 'b', 'y', 'g', 'orange', 'brown', 'purple'] + ['k' for i in range(500)] - - no_sets = len(Sets) - no_samples = 0 - data = [] - samples = [] - repeats = 2 - - for i in range(no_samples): - j = np.random.randint(0, no_sets, 1) - S = Sets[j[0]] - for _ in range(repeats): - sample = process.sample_discretized(S, dt) - samples.append(sample) - data.append((S, sample, dt)) - - sample_D = process.sample_discretized(D, dt) - samples.append(sample_D) - no_samples = repeats * no_samples + 1 - data.append((D, sample_D, dt)) - - estimator.load_data(data) - - xtest = D.return_discretization(n=n) - - # likelihood based - estimator.penalized_likelihood() - rate_mean = estimator.mean_rate(D, n=n) - - # _, lcb, ucb = estimator.map_lcb_ucb(D, n, beta=2.) - - for j in range(no_samples): - if samples[j] is not None: - plt.plot(samples[j], samples[j] * 0, 'o', color=c[j]) - - plt.plot(xtest, rate_mean, label='likelihood - locations known') - # plt.fill_between(xtest.numpy().flatten(), lcb.numpy().flatten(), ucb.numpy().flatten(), alpha=0.4, - # color='blue', label='triangle') - process.visualize(D, samples=0, n=n, dt=1.) + torch.manual_seed(2) + np.random.seed(2) + d = 1 + gamma = 0.2 + n = 64 + B = 4.0 + b = 0.5 + + process = PoissonPointProcess(d=1, B=B, b=b) + Sets = [] + levels = 3 + hierarchical_structure = HierarchicalBorelSets(d=1, interval=(-1, 1), levels=levels) + Sets = hierarchical_structure.get_all_sets() + + D = BorelSet(1, bounds=torch.Tensor([[-1.0, 1.0]]).double()) + + m = 32 + embedding = HermiteEmbedding(m=m, d=1, gamma=gamma) + k = KernelFunction(gamma=gamma) + estimator = MBRPositiveEstimator( + process, + hierarchical_structure, + kernel_object=k, + B=B, + m=m, + d=d, + embedding=embedding, + basis="custom", + ) + min_vol, max_vol = estimator.get_min_max() + + dt = 10.0 / (b * min_vol) + dt = dt * 2 + + print("Suggested dt:", dt) + c = ["k", "r", "b", "y", "g", "orange", "brown", "purple"] + [ + "k" for i in range(500) + ] + + no_sets = len(Sets) + no_samples = 0 + data = [] + samples = [] + repeats = 2 + + for i in range(no_samples): + j = np.random.randint(0, no_sets, 1) + S = Sets[j[0]] + for _ in range(repeats): + sample = process.sample_discretized(S, dt) + samples.append(sample) + data.append((S, sample, dt)) + + sample_D = process.sample_discretized(D, dt) + samples.append(sample_D) + no_samples = repeats * no_samples + 1 + data.append((D, sample_D, dt)) + + estimator.load_data(data) + + xtest = D.return_discretization(n=n) + + # likelihood based + estimator.penalized_likelihood() + rate_mean = estimator.mean_rate(D, n=n) + + # _, lcb, ucb = estimator.map_lcb_ucb(D, n, beta=2.) + + for j in range(no_samples): + if samples[j] is not None: + plt.plot(samples[j], samples[j] * 0, "o", color=c[j]) + + plt.plot(xtest, rate_mean, label="likelihood - locations known") + # plt.fill_between(xtest.numpy().flatten(), lcb.numpy().flatten(), ucb.numpy().flatten(), alpha=0.4, + # color='blue', label='triangle') + process.visualize(D, samples=0, n=n, dt=1.0) diff --git a/stpy/point_processes/poisson/poisson.py b/stpy/point_processes/poisson/poisson.py index 4228b1f..843602c 100644 --- a/stpy/point_processes/poisson/poisson.py +++ b/stpy/point_processes/poisson/poisson.py @@ -5,152 +5,190 @@ from stpy.borel_set import BorelSet -class PoissonPointProcess(): - """ - parametrized by log linear model - - """ - - def __init__(self, d=1, B=1, b=0.2, rate=None, rate_volume=None): - self.B = B - self.d = d - self.b = b - if rate is None: - self.rate = self.rate_default - else: - self.rate = rate - - self.rate_volume_f = rate_volume - self.exact = True - - def rate_default(self, x, dt=1.): - return (self.B * torch.sum(torch.exp(-(x + 1)) * torch.sin(2 * x * np.pi) ** 2, dim=1).view(-1, - 1) + self.b) * dt - - def rate_volume(self, S, dt=1, rate=None): - if self.rate_volume_f is None: - # integrate rate numerically over S - import scipy.integrate as integrate - if rate is None: - rate = self.rate - else: - rate = rate - integral = 0 - if self.d == 1: - # integrate = S.volume()* self.rate(torch.from_numpy(S.bounds[0,1]).view(1)) - integral, _ = integrate.quad(lambda x: rate(torch.Tensor([x]).view(1, 1)).numpy(), - float(S.bounds[0, 0]), float(S.bounds[0, 1])) - elif self.d == 2: - integrand = lambda x, y: rate(torch.Tensor([x, y]).view(1, 2).double()).numpy() - integral, _ = integrate.dblquad(integrand, float(S.bounds[0, 0]), float(S.bounds[0, 1]), - lambda x: float(S.bounds[1, 0]), lambda x: float(S.bounds[1, 1])) - - return integral * dt - else: - return self.rate_volume_f(S) * dt - - def sample_discretized(self, S, dt, n=100): - lam = np.maximum(float(self.rate_volume(S, dt)), 0) - count = np.random.poisson(lam=lam) - if count > 0: - x = S.return_discretization(n) - r = self.rate(x) * dt - r = torch.maximum(r, r * 0) - sample = torch.from_numpy( - np.random.choice(np.arange(0, x.size()[0], 1), size=count, p=(r / torch.sum(r)).numpy().reshape(-1))) - return x[sample, :] - else: - return None - - def sample_discretized_direct(self, x, val): - lam = 1000 - count = np.random.poisson(lam=np.maximum(0, lam)) - if count > 0: - val = torch.abs(val) - sample = torch.from_numpy(np.random.choice(np.arange(0, x.size()[0], 1), - size=count, p=(val / torch.sum(val)).numpy().reshape(-1))) - return x[sample, :] - else: - return None - - def sample(self, S, dt=1., verbose=False, rate=None): - """ - - :param S: set where it should be sampled - :return: - """ - if self.exact == True: - return self.sample_discretized(S, dt=dt) - else: - - lam = self.rate_volume(S, dt) - n = np.random.poisson(lam=lam) - print("Number of events:", n) - alpha = 1. - - new_sample = [] - size = 0 - while size < n: - # uniform sample g(s) = 1/vol(S) - sample = S.uniform_sample(1) - t = self.rate(sample) / (alpha) - p = np.random.uniform(0, 1) - if p < t: - new_sample.append(sample.view(1, -1)) - size = size + 1 - - if len(new_sample) > 1: - x = torch.cat(new_sample, dim=0) - else: - return None - return x - - def rate_sets(self, Sets, dt=1): - res = [] - for S in Sets: - res.append(self.rate_volume(S, dt=dt)) - return res - - def visualize(self, S, samples=2, n=10, dt=1., show=True): - xtest = S.return_discretization(n) - rate = self.rate(xtest) - - if self.d == 1: - plt.plot(xtest, rate, label='rate', lw=3) - for i in range(samples): - - x = self.sample(S, dt=dt) - if x is not None: - n = x.size()[0] - plt.plot(x, x * 0, 'o', label='sample n=' + str(n)) - - elif self.d == 2: - from scipy.interpolate import griddata - xx = xtest[:, 0].detach().numpy() - yy = xtest[:, 1].detach().numpy() - grid_x, grid_y = np.mgrid[min(xx):max(xx):100j, min(yy):max(yy):100j] - grid_z_mu = griddata((xx, yy), rate[:, 0].detach().numpy(), (grid_x, grid_y), method='linear') - fig, ax = plt.subplots(figsize=(15, 7)) - cs = ax.contourf(grid_x, grid_y, grid_z_mu, label='rate') - ax.contour(cs, colors='k') - - for i in range(samples): - x = self.sample(S, dt=dt) - if x is not None: - ax.plot(x[:, 0].detach().numpy(), x[:, 1].detach().numpy(), 'o', ms=10, alpha=0.5, label='sample') - ax.grid(c='k', ls='-', alpha=0.1) - plt.colorbar(cs) - - plt.legend() - if show == True: - plt.show() +class PoissonPointProcess: + """ + parametrized by log linear model + + """ + + def __init__(self, d=1, B=1, b=0.2, rate=None, rate_volume=None): + self.B = B + self.d = d + self.b = b + if rate is None: + self.rate = self.rate_default + else: + self.rate = rate + + self.rate_volume_f = rate_volume + self.exact = True + + def rate_default(self, x, dt=1.0): + return ( + self.B + * torch.sum( + torch.exp(-(x + 1)) * torch.sin(2 * x * np.pi) ** 2, dim=1 + ).view(-1, 1) + + self.b + ) * dt + + def rate_volume(self, S, dt=1, rate=None): + if self.rate_volume_f is None: + # integrate rate numerically over S + import scipy.integrate as integrate + + if rate is None: + rate = self.rate + else: + rate = rate + integral = 0 + if self.d == 1: + # integrate = S.volume()* self.rate(torch.from_numpy(S.bounds[0,1]).view(1)) + integral, _ = integrate.quad( + lambda x: rate(torch.Tensor([x]).view(1, 1)).numpy(), + float(S.bounds[0, 0]), + float(S.bounds[0, 1]), + ) + elif self.d == 2: + integrand = lambda x, y: rate( + torch.Tensor([x, y]).view(1, 2).double() + ).numpy() + integral, _ = integrate.dblquad( + integrand, + float(S.bounds[0, 0]), + float(S.bounds[0, 1]), + lambda x: float(S.bounds[1, 0]), + lambda x: float(S.bounds[1, 1]), + ) + + return integral * dt + else: + return self.rate_volume_f(S) * dt + + def sample_discretized(self, S, dt, n=100): + lam = np.maximum(float(self.rate_volume(S, dt)), 0) + count = np.random.poisson(lam=lam) + if count > 0: + x = S.return_discretization(n) + r = self.rate(x) * dt + r = torch.maximum(r, r * 0) + sample = torch.from_numpy( + np.random.choice( + np.arange(0, x.size()[0], 1), + size=count, + p=(r / torch.sum(r)).numpy().reshape(-1), + ) + ) + return x[sample, :] + else: + return None + + def sample_discretized_direct(self, x, val): + lam = 1000 + count = np.random.poisson(lam=np.maximum(0, lam)) + if count > 0: + val = torch.abs(val) + sample = torch.from_numpy( + np.random.choice( + np.arange(0, x.size()[0], 1), + size=count, + p=(val / torch.sum(val)).numpy().reshape(-1), + ) + ) + return x[sample, :] + else: + return None + + def sample(self, S, dt=1.0, verbose=False, rate=None): + """ + + :param S: set where it should be sampled + :return: + """ + if self.exact == True: + return self.sample_discretized(S, dt=dt) + else: + + lam = self.rate_volume(S, dt) + n = np.random.poisson(lam=lam) + print("Number of events:", n) + alpha = 1.0 + + new_sample = [] + size = 0 + while size < n: + # uniform sample g(s) = 1/vol(S) + sample = S.uniform_sample(1) + t = self.rate(sample) / (alpha) + p = np.random.uniform(0, 1) + if p < t: + new_sample.append(sample.view(1, -1)) + size = size + 1 + + if len(new_sample) > 1: + x = torch.cat(new_sample, dim=0) + else: + return None + return x + + def rate_sets(self, Sets, dt=1): + res = [] + for S in Sets: + res.append(self.rate_volume(S, dt=dt)) + return res + + def visualize(self, S, samples=2, n=10, dt=1.0, show=True): + xtest = S.return_discretization(n) + rate = self.rate(xtest) + + if self.d == 1: + plt.plot(xtest, rate, label="rate", lw=3) + for i in range(samples): + + x = self.sample(S, dt=dt) + if x is not None: + n = x.size()[0] + plt.plot(x, x * 0, "o", label="sample n=" + str(n)) + + elif self.d == 2: + from scipy.interpolate import griddata + + xx = xtest[:, 0].detach().numpy() + yy = xtest[:, 1].detach().numpy() + grid_x, grid_y = np.mgrid[ + min(xx) : max(xx) : 100j, min(yy) : max(yy) : 100j + ] + grid_z_mu = griddata( + (xx, yy), rate[:, 0].detach().numpy(), (grid_x, grid_y), method="linear" + ) + fig, ax = plt.subplots(figsize=(15, 7)) + cs = ax.contourf(grid_x, grid_y, grid_z_mu, label="rate") + ax.contour(cs, colors="k") + + for i in range(samples): + x = self.sample(S, dt=dt) + if x is not None: + ax.plot( + x[:, 0].detach().numpy(), + x[:, 1].detach().numpy(), + "o", + ms=10, + alpha=0.5, + label="sample", + ) + ax.grid(c="k", ls="-", alpha=0.1) + plt.colorbar(cs) + + plt.legend() + if show == True: + plt.show() if __name__ == "__main__": - d = 2 - n = 100 - bounds = torch.Tensor([[-1, 1], [-1, 1]]).double() - D = BorelSet(d, bounds) + d = 2 + n = 100 + bounds = torch.Tensor([[-1, 1], [-1, 1]]).double() + D = BorelSet(d, bounds) - process = PoissonPointProcess(d=d, B=2) - process.visualize(D, samples=10, n=n, dt=10) + process = PoissonPointProcess(d=d, B=2) + process.visualize(D, samples=10, n=n, dt=10) diff --git a/stpy/point_processes/poisson_rate_estimator.py b/stpy/point_processes/poisson_rate_estimator.py index dbbb1c5..ef33832 100644 --- a/stpy/point_processes/poisson_rate_estimator.py +++ b/stpy/point_processes/poisson_rate_estimator.py @@ -7,9 +7,16 @@ from quadprog import solve_qp from torchmin import minimize as minimize_torch -from stpy.embeddings.bernstein_embedding import BernsteinEmbedding, BernsteinSplinesEmbedding, \ - BernsteinSplinesOverlapping -from stpy.embeddings.bump_bases import PositiveNystromEmbeddingBump, TriangleEmbedding, FaberSchauderEmbedding +from stpy.embeddings.bernstein_embedding import ( + BernsteinEmbedding, + BernsteinSplinesEmbedding, + BernsteinSplinesOverlapping, +) +from stpy.embeddings.bump_bases import ( + PositiveNystromEmbeddingBump, + TriangleEmbedding, + FaberSchauderEmbedding, +) from stpy.embeddings.optimal_positive_basis import OptimalPositiveBasis from stpy.helpers.ellipsoid_algorithms import maximize_on_elliptical_slice from stpy.point_processes.rate_estimator import RateEstimator @@ -17,1927 +24,2473 @@ class PoissonRateEstimator(RateEstimator): - def __init__(self, process, hierarchy, d=1, m=100, kernel_object=None, B=1., s=1., jitter=10e-8, b=0., - basis='triangle', estimator='likelihood', feedback='count-record', offset=0.1, uncertainty='laplace', - approx=None, stepsize=None, embedding=None, beta=2., sampling='proximal+prox', peeking=True, - constraints=True, var_cor_on=True, - samples_nystrom=15000, inverted_constraint=False, steps=None, dual=True, no_anchor_points=1024, U=1., - opt='torch'): - - self.process = process - self.d = d - self.s = s - self.b = b - self.B = B - self.U = U - self.stepsize = stepsize - self.sampling = sampling - self.steps = steps - self.opt = opt - self.kernel_object = kernel_object - # set hierarchy - self.constraints = constraints - self.hierarchy = hierarchy - self.ucb_identified = False - self.inverted_constraint = inverted_constraint - # approximation - self.loglikelihood = 0. - self.dual = dual - self.peeking = peeking - self.no_anchor_points = no_anchor_points - if beta < 0.: - self.beta = lambda t: self.beta_theory() - else: - self.beta = lambda t: beta - self.var_cor_on = var_cor_on - - if basis == 'triangle': - self.packing = TriangleEmbedding(d, m, kernel_object=kernel_object, B=B, b=b, offset=offset, - s=np.sqrt(jitter)) - elif basis == 'bernstein': - self.packing = BernsteinEmbedding(d, m, kernel_object=kernel_object, B=B, b=b, offset=offset, - s=np.sqrt(jitter)) - elif basis == 'splines': - self.packing = BernsteinSplinesEmbedding(d, m, kernel_object=kernel_object, B=B, b=b, offset=offset, - s=np.sqrt(jitter)) - elif basis == 'nystrom': - self.packing = PositiveNystromEmbeddingBump(d, m, kernel_object=kernel_object, B=B, b=b, offset=offset, - s=np.sqrt(jitter), samples=samples_nystrom) - elif basis == 'overlap-splines': - self.packing = BernsteinSplinesOverlapping(d, m, kernel_object=kernel_object, B=B, b=b, offset=offset, - s=np.sqrt(jitter)) - elif basis == 'faber': - self.packing = FaberSchauderEmbedding(d, m, kernel_object=kernel_object, B=B, b=b, offset=offset, - s=np.sqrt(jitter)) - elif basis == "optimal-positive": - self.packing = OptimalPositiveBasis(d, m, kernel_object=kernel_object, B=B, b=b, offset=offset, - s=np.sqrt(jitter), samples=samples_nystrom) - elif basis == "custom": - self.packing = embedding - else: - raise NotImplementedError("The request positive basis is not implemented.") - self.m = m - self.data = None - self.covariance = False - - # stabilizing the matrix inversion - self.jitter = jitter - - # for variance stabilization - self.stabilization = None - self.approx_fit = False - - # properties of rate estimator - self.estimator = estimator - self.feedback = feedback - self.uncertainty = uncertainty - self.approx = approx - - # precompute information - self.basic_sets = self.hierarchy.get_sets_level(self.hierarchy.levels) - - self.varphis = torch.zeros(size=(len(self.basic_sets), self.get_m())).double() - self.variances = torch.ones(size=(len(self.basic_sets), 1)).double().view(-1) - self.variances_histogram = [] - self.observations = None - self.rate = None - self.W = (s) * torch.eye(self.get_m()).double() - self.W_inv_approx = (1. / s) * torch.eye(self.get_m()).double() - self.beta_value = 2. - self.sampled_theta = None - - if self.dual == True: - if self.d == 1: - anchor = no_anchor_points - self.anchor_points = self.hierarchy.top_node.return_discretization(anchor) - self.anchor_weights = torch.zeros(size=(anchor, 1)).double().view(-1) - elif self.d == 2: - anchor = no_anchor_points - self.anchor_points = self.hierarchy.top_node.return_discretization(int(np.sqrt(anchor))) - self.anchor_weights = torch.zeros(size=(anchor, 1)).double().view(-1) - self.global_dt = 0. - self.anchor_points_emb = self.packing.embed(self.anchor_points) - - if feedback == "count-record" and basis != "custom": - print("Precomputing phis.") - for index_set, set in enumerate(self.basic_sets): - self.varphis[index_set, :] = self.packing.integral(set) - self.variances[index_set] = set.volume() * self.B - else: - pass - - print("Precomputation finished.") - - def add_data_point(self, new_data, times=True): - - super().add_data_point(new_data, times=times) - - if self.rate is not None: - rate = self.rate - else: - l, _, u = self.get_constraints() - Gamma_half = self.cov() - rate = Gamma_half @ u - - if self.feedback == 'histogram': - val = self.packing.integral(new_data[0]) @ rate * new_data[2] - v = - np.log(val) + val - - elif self.feedback == 'count-record': - v = self.packing.integral(new_data[0]) @ rate * new_data[2] - if new_data[1] is not None: - val2 = self.packing.embed(new_data[1]) @ rate * new_data[2] - v = v - torch.sum(np.log(val2)) - - self.loglikelihood += v - - def beta_theory(self): - if self.approx_fit == False: - l, Lambda, u = self.get_constraints() - Gamma_half, invGamma_half = self.cov(inverse=True) - - ## norm - norm = self.s - - ## constraints - eps = 10e-3 - res = Gamma_half @ self.rate.view(-1, 1) - torch.from_numpy(l).view(-1, 1) - xi = res.clone() - xi[res > eps] = 0. - - constraint = xi.T @ Gamma_half @ self.W_inv_approx @ Gamma_half.T @ xi - - ## concentration - vol = 4 * np.log(1. / 0.1) + torch.logdet(self.W) - self.get_m() * np.log(self.s) - self.beta_value = np.sqrt(norm + vol + constraint) - print('-------------------') - print("New beta:", self.beta_value) - print("norm:", norm) - print("constraint:", constraint) - print("vol:", vol) - print("-------------------") - else: - pass - return self.beta_value - - def get_constraints(self): - return self.packing.get_constraints() - - def cov(self, inverse=False): - return self.packing.cov(inverse=inverse) - - def fit_gp(self, threads=4): - - if self.data is not None: - if self.feedback == "count-record": - - if self.estimator == "likelihood": - if self.opt == 'cvxpy': - self.penalized_likelihood(threads=threads) - elif self.opt == 'torch': - self.penalized_likelihood_fast(threads=threads) - else: - raise NotImplementedError("The optimization method does not exist") - - elif self.estimator == "least-sq": - self.least_squares_weighted() - - elif self.estimator == "bins": - self.penalized_likelihood_bins() - - else: - raise AssertionError("wrong name.") - - - elif self.feedback == 'histogram': - - if self.estimator == "likelihood": - self.penalized_likelihood_integral() - - elif self.estimator == "least-sq": - self.least_squares_weighted_integral() - - elif self.estimator == "bins": - self.penalized_likelihood_integral_bins() - - else: - raise AssertionError("wrong name.") - else: - raise AssertionError("wrong name.") - else: - l, Lambda, u = self.get_constraints() - Gamma_half = self.cov() - self.rate = l - - def sample_mirror_langevin(self, steps=500, verbose=False): - - l, Lambda, u = self.get_constraints() - Gamma_half, invGamma_half = self.cov(inverse=True) - - v = torch.from_numpy((u + l) / 2.).view(-1, 1) - S = torch.diag(torch.from_numpy(u - l).view(-1) / 2.).double() - - phis = self.phis.clone() @ invGamma_half - - if self.observations is not None: - obs = self.observations @ invGamma_half - else: - obs = None - - invGamma = invGamma_half.T @ invGamma_half - transform = lambda y: S @ torch.tanh(y) + v - - if self.feedback == "count-record" and self.dual == False: - if obs is not None: - func = lambda y: -torch.sum(torch.log(obs @ transform(y)).view(-1)) \ - + torch.sum(phis @ transform(y)) \ - + self.s * transform(y).T @ invGamma @ transform(y) + torch.sum( - torch.log(1. / (1. - transform(y) ** 2))) - else: - func = lambda y: torch.sum(phis @ transform(y)) \ - + self.s * transform(y).T @ invGamma @ transform(y) + torch.sum( - torch.log(1. / (1. - transform(y) ** 2))) # torch.sum(torch.log(0.5*(1.+torch.cosh(2*y)))) - - - elif self.feedback == "count-record" and self.dual == True: - mask = self.bucketized_counts > 0 - phis = self.varphis[mask, :] @ invGamma_half - tau = self.total_bucketized_time[mask] - - if obs is not None: - obs = self.anchor_points_emb @ invGamma_half - weights = self.anchor_weights - mask = weights > 0. - - func = lambda y: -torch.sum(weights[mask].view(-1, 1) * torch.log(obs[mask, :] @ transform(y))) \ - + torch.sum(tau.view(-1, 1) * (phis @ transform(y))) \ - + self.s * transform(y).T @ invGamma @ transform(y) + torch.sum( - torch.log(1. / (1. - (transform(y) ** 2)))) # + torch.sum(torch.log(0.5*(1.+torch.cosh(2*y)))) - else: - func = lambda y: torch.sum(tau.view(-1, 1) * (phis @ transform(y))) \ - + self.s * transform(y).T @ invGamma @ transform(y) + torch.sum( - torch.log(1. / (1. - transform(y) ** 2))) # + torch.sum(torch.log(0.5*(1.+torch.cosh(2*y)))) - - elif self.feedback == "histogram": - func = lambda y: - torch.sum( - self.counts.clone().view(-1) * torch.log(phis @ (S @ torch.tanh(y) + v)).view(-1)) \ - + torch.sum(phis @ (S @ torch.tanh(y) + v)) \ - + self.s * (S @ torch.tanh(y) + v).T @ invGamma @ (S @ torch.tanh(y) + v) - - y = torch.rand(size=(self.get_m(), 1), dtype=torch.float64, requires_grad=True) - - # initiallize with map sqeezed more - y.data = Gamma_half @ self.rate.view(-1, 1) # u < theta < l - - u_new = u + 0.01 - l_new = l - 0.01 - v2 = torch.from_numpy((u_new + l_new) / 2.).view(-1, 1) - S2 = torch.diag(torch.from_numpy(u_new - l_new).view(-1) / 2.).double() - # - y.data = torch.inverse(S2) @ (y.data - v2) - y.data = torch.atanh(y.data) - - W = S.T @ invGamma_half.T @ self.construct_covariance_matrix_laplace() @ invGamma_half @ S - L = float(scipy.sparse.linalg.eigsh(W.numpy(), k=1, which='LM', return_eigenvectors=False, tol=1e-8)) - eta = 0.05 / (L + 1) - - print("Eta:", eta) - - for k in range(steps): - - w = torch.randn(size=(self.get_m(), 1)).double() - nabla_y = torch.autograd.functional.jacobian(func, y).data[0, 0, :, :] - y.data = y.data - eta * nabla_y + np.sqrt(2 * eta) * w - theta = torch.tanh(y).detach() - - if verbose == True: - print("Iter:", k, (S @ theta + v).T) - print(y.T) - - self.sampled_theta = invGamma_half @ transform(y.data) - - def sample_projected_langevin(self, steps=300, verbose=False, stepsize=None): - """ - :param burn_in: - :return: - """ - - Gamma_half = self.packing.cov() - - def prox(x): - z = x.numpy() - theta = cp.Variable((self.get_m(), 1)) - objective = cp.Minimize(cp.sum_squares(z - theta)) - constraints = [] - l, Lambda, u = self.get_constraints() - Lambda = Lambda @ Gamma_half.numpy() - constraints.append(Lambda @ theta >= l.reshape(-1, 1)) - prob = cp.Problem(objective, constraints) - prob.solve(solver=cp.OSQP, warm_start=False, verbose=False, eps_abs=1e-3, eps_rel=1e-3) - return torch.from_numpy(theta.value) - - if self.feedback == "count-record" and self.dual == False: - if self.observations is not None: - nabla = lambda y: -torch.einsum('i,ij->j', 1. / (self.observations @ y).view(-1), - self.observations).view(-1, 1) + \ - torch.sum(self.phis, dim=0).view(-1, 1) \ - + self.s * y.view(-1, 1) - else: - nabla = lambda theta: torch.sum(self.phis, dim=0).view(-1, 1) + self.s * theta.view(-1, 1) - - elif self.feedback == "count-record" and self.dual == True: - mask = self.bucketized_counts > 0 - phis = self.varphis[mask, :] - tau = self.total_bucketized_time[mask] - - if self.observations is not None: - obs = self.anchor_points_emb - weights = self.anchor_weights - mask = weights > 0. - nabla = lambda y: -torch.einsum('i,ij->j', weights[mask] / ((obs[mask, :] @ y).view(-1)), - obs[mask]).view(-1, 1) + \ - torch.einsum('i,ij->j', tau, phis).view(-1, 1) \ - + self.s * y.view(-1, 1) - else: - nabla = lambda y: torch.einsum('i,ij->j', tau, phis).view(-1, 1) \ - + self.s * y.view(-1, 1) - - - elif self.feedback == "histogram": - nabla = lambda theta: -torch.sum(torch.diag((1. / (self.phis @ theta).view(-1)) * self.counts) @ self.phis, - dim=0).view(-1, 1) \ - + torch.sum(self.phis, dim=0).view(-1, 1) + self.s * theta.view(-1, 1) - - theta = self.rate.view(-1, 1) - W = self.construct_covariance_matrix_laplace(minimal=True) - L = float(scipy.sparse.linalg.eigsh(W.numpy(), k=1, which='LM', return_eigenvectors=False, tol=1e-5)) - - if stepsize is None: - eta = 0.5 / (L + 1) - else: - eta = np.minimum(1, stepsize * 0.5 / L) - - print(eta) - for k in range(steps): - w = torch.randn(size=(self.get_m(), 1)).double() - theta = prox(theta - eta * nabla(theta) + np.sqrt(2 * eta) * w) - - if verbose == True: - print("Iter:", k, theta.T) - - self.sampled_theta = theta - - def sample_proximal_langevin_prox(self, steps=300, verbose=False, stepsize=None): - """ - :param burn_in: - :return: - """ - - Gamma_half, invGamma_half = self.packing.cov(inverse=True) - # invGamma = invGamma_half.T @ invGamma_half - l, Lambda, u = self.get_constraints() - Lambda = Lambda @ Gamma_half.numpy() - - def prox(x): - res = solve_qp(np.eye(self.get_m()), x.numpy().reshape(-1), C=Gamma_half.numpy(), b=l.numpy(), - factorized=True) - return torch.from_numpy(res[0]).view(-1, 1) - - # theta_n = cp.Variable((self.get_m(), 1)) - # x = cp.Parameter((self.get_m(), 1)) - # objective = cp.Minimize(cp.sum_squares(x - theta_n)) - # - # constraints = [] - # l, Lambda, u = self.get_constraints() - # Lambda = Lambda @ Gamma_half.numpy() - # constraints.append(Lambda @ theta_n >= l.reshape(-1, 1)) - # constraints.append(Lambda @ theta_n <= u.reshape(-1, 1)) - # - # prob = cp.Problem(objective, constraints) - - # def prox(x): - # return Gamma_half @ torch.from_numpy(scipy.optimize.nnls(invGamma.numpy(), (invGamma_half@x).numpy().reshape(-1), maxiter = 1000)[0]).view(-1,1) - - if self.data is not None: - if self.feedback == "count-record" and self.dual == False: - if self.observations is not None: - nabla = lambda y: -torch.einsum('i,ij->j', 1. / (self.observations @ y).view(-1), - self.observations).view(-1, 1) + \ - torch.sum(self.phis, dim=0).view(-1, 1) \ - + self.s * y.view(-1, 1) - else: - nabla = lambda theta: torch.sum(self.phis, dim=0).view(-1, 1) + self.s * theta.view(-1, 1) - - elif self.feedback == "count-record" and self.dual == True: - mask = self.bucketized_counts > 0 - phis = self.varphis[mask, :] - tau = self.total_bucketized_time[mask] - - if self.observations is not None: - obs = self.anchor_points_emb - weights = self.anchor_weights - mask = weights > 0. - nabla = lambda y: -torch.einsum('i,ij->j', weights[mask] / ((obs[mask, :] @ y).view(-1)), - obs[mask]).view(-1, 1) + \ - torch.einsum('i,ij->j', tau, phis).view(-1, 1) \ - + self.s * y.view(-1, 1) - else: - nabla = lambda y: torch.einsum('i,ij->j', tau, phis).view(-1, 1) \ - + self.s * y.view(-1, 1) - - - elif self.feedback == "histogram": - nabla = lambda theta: -torch.sum( - torch.diag((1. / (self.phis @ theta).view(-1)) * self.counts) @ self.phis, - dim=0).view(-1, 1) \ - + torch.sum(self.phis, dim=0).view(-1, 1) + self.s * theta.view(-1, 1) - else: - nabla = lambda theta: self.s * theta.view(-1, 1) - - if self.rate is not None: - theta = self.rate.view(-1, 1) - else: - theta = self.b + 0.05 * torch.rand(size=(self.get_m(), 1), dtype=torch.float64, requires_grad=False).view( - -1, 1) ** 2 - - for k in range(steps): - w = torch.randn(size=(self.get_m(), 1)).double() - - # calculate proper step-size - W = self.construct_covariance_matrix_laplace(theta=theta) - L = float(scipy.sparse.linalg.eigsh(W.numpy(), k=1, which='LM', return_eigenvectors=False, tol=1e-3)) - if stepsize is not None: - eta = 0.5 * stepsize / L - else: - eta = 0.5 / L - - # prox calculate - # x.value = theta.numpy() - # prob.solve(solver=cp.OSQP, warm_start=True, verbose=False, eps_abs=1e-3, eps_rel=1e-3) - # proximal_theta = torch.from_numpy(theta_n.value) - - # update step - # theta = 0.5 * theta - eta * nabla(theta) + 0.5 * proximal_theta + np.sqrt(2 * eta) * w - - # update step - theta = 0.5 * theta - eta * nabla(theta) + 0.5 * prox(theta) + np.sqrt(2 * eta) * w - if verbose == True: - print("Iter:", k, theta.T) - - self.sampled_theta = prox(theta) - - def sample_proximal_langevin_simple_prox(self, steps=300, verbose=False): - - Gamma_half, invGamma_half = self.packing.cov(inverse=True) - l, Lambda, u = self.get_constraints() - prox_simple = lambda x: torch.minimum(torch.maximum(x.view(-1), torch.from_numpy(l).view(-1)) \ - , torch.from_numpy(u).view(-1)).view(-1, 1) - - def prox(x): - return invGamma_half @ prox_simple(Gamma_half @ x) - - phis = self.phis - if self.feedback == "count-record" and self.dual == False: - if self.observations is not None: - obs = self.observations - - func = lambda y: -torch.sum(torch.log(obs @ y)) \ - + torch.sum((phis @ y)) \ - + self.s * y.T @ y - - nabla = lambda y: -torch.einsum('i,ij->j', 1. / (obs @ y).view(-1), obs).view(-1, 1) + \ - torch.sum(phis, dim=0).view(-1, 1) \ - + self.s * y.view(-1, 1) - else: - func = lambda y: torch.sum(phis @ y).view(-1, 1) \ - + self.s * y.T @ y - - nabla = lambda y: torch.sum(phis, dim=0).view(-1, 1) + self.s * y.view(-1, 1) - - - - - - elif self.feedback == "count-record" and self.dual == True: - mask = self.bucketized_counts > 0 - phis = self.varphis[mask, :] - tau = self.total_bucketized_time[mask] - - if self.observations is not None: - obs = self.anchor_points_emb - weights = self.anchor_weights - mask = weights > 0. - func = lambda y: -torch.sum(weights[mask].view(-1, 1) * torch.log(obs[mask, :] @ y)) \ - + torch.sum(tau.view(-1, 1) * (phis @ y)) \ - + self.s * y.T @ y - - nabla = lambda y: -torch.einsum('i,ij->j', weights[mask] / ((obs[mask, :] @ y).view(-1)), - obs[mask]).view(-1, 1) + \ - torch.einsum('i,ij->j', tau, phis).view(-1, 1) \ - + self.s * y.view(-1, 1) - else: - func = lambda y: torch.sum(tau.view(-1, 1) * (phis @ y)) \ - + self.s * y.T @ y - - nabla = lambda y: torch.einsum('i,ij->j', tau, phis).view(-1, 1) \ - + self.s * y.view(-1, 1) - - elif self.feedback == "histogram": - func = lambda y: - torch.sum(self.counts.view(-1) * torch.log(phis @ y).view(-1)) + \ - torch.sum(phis @ y) \ - + self.s * y.T @ y - nabla = lambda y: -torch.einsum('i,ij->j', self.counts.view(-1) / (phis @ y).view(-1), phis).view(-1, 1) + \ - torch.sum(phis, dim=0).view(-1, 1) + self.s * y - - # hessian = lambda y: self.construct_covariance_matrix_laplace() - - y = prox(torch.randn(size=(self.get_m(), 1), dtype=torch.float64, requires_grad=True)) - y.data = self.rate.view(-1, 1) - - W = self.construct_covariance_matrix_laplace() - L = float(scipy.sparse.linalg.eigsh(W.numpy(), k=1, which='LM', return_eigenvectors=False, tol=1e-5)) - - eta = 0.5 / (L + 1) - - for k in range(steps): - W = torch.randn(size=(self.get_m(), 1)).double() - nabla_y = nabla(y.data) - y.data = (1 - eta) * y.data - eta * nabla_y + eta * prox(y.data) + np.sqrt(2 * eta) * W - if verbose == True: - print("Iter:", k, y.T) - print("grad:", y.grad.T) - - self.sampled_theta = prox(y.detach()) - - def sample_hessian_positive_langevin(self, steps=500, verbose=False, stepsize=None): - - if self.data is not None: - if self.feedback == "count-record" and self.dual == False: - if self.observations is not None: - nabla = lambda y: -torch.einsum('i,ij->j', 1. / (self.observations @ y).view(-1), - self.observations).view(-1, 1) + \ - torch.sum(self.phis, dim=0).view(-1, 1) \ - + self.s * y.view(-1, 1) - else: - nabla = lambda theta: torch.sum(self.phis, dim=0).view(-1, 1) + self.s * theta.view(-1, 1) - - elif self.feedback == "count-record" and self.dual == True: - - mask = self.bucketized_counts > 0 - phis = self.varphis[mask, :] - tau = self.total_bucketized_time[mask] - - if self.observations is not None: - obs = self.anchor_points_emb - weights = self.anchor_weights - mask = weights > 0. - nabla = lambda y: -torch.einsum('i,ij->j', weights[mask] / ((obs[mask, :] @ y).view(-1)), - obs[mask]).view(-1, 1) + \ - torch.einsum('i,ij->j', tau, phis).view(-1, 1) \ - + self.s * y.view(-1, 1) - else: - nabla = lambda y: torch.einsum('i,ij->j', tau, phis).view(-1, 1) \ - + self.s * y.view(-1, 1) - - - elif self.feedback == "histogram": - nabla = lambda theta: -torch.sum( - torch.diag((1. / (self.phis @ theta).view(-1)) * self.counts) @ self.phis, - dim=0).view(-1, 1) \ - + torch.sum(self.phis, dim=0).view(-1, 1) + self.s * theta.view(-1, 1) - else: - nabla = lambda theta: self.s * theta.view(-1, 1) - - Gamma_half = self.packing.cov() - lz, Lambda, u = self.get_constraints() - - Lambda = torch.from_numpy(Lambda) @ Gamma_half - y = self.b + 0.05 * torch.rand(size=(self.get_m(), 1), dtype=torch.float64, requires_grad=True).view(-1) ** 2 - - if self.rate is not None: - y.data = self.rate.data + Gamma_half @ y.data - else: - y.data = Gamma_half @ y.data - - if verbose == True: - print("initial point") - print(y.data) - - W = self.construct_covariance_matrix_laplace() - L = float(scipy.sparse.linalg.eigsh(W.numpy(), k=1, which='LM', return_eigenvectors=False, tol=1e-5)) - - if stepsize is None: - eta = 1. / (L + 1) - else: - eta = stepsize / (L + 1) - - D = lambda x: torch.diag(1. / torch.abs(Lambda @ x).view(-1)) - sqrt_hessian = lambda x: Lambda @ D(x) - - phi = lambda x: -torch.sum(torch.log(Lambda @ x)) - nabla_phi = lambda x: -torch.einsum('i,ij->j', 1. / (Lambda @ x).view(-1), Lambda) - hessian_phi = lambda x: Lambda.T @ torch.diag(1. / (Lambda @ x).view(-1) ** 2) @ Lambda - - for k in range(steps): - w = torch.randn(size=(self.get_m(), 1)).double() - nabla_val = nabla(y) - H = sqrt_hessian(y.data) - z = nabla_phi(y.data).view(-1, 1) - eta * nabla_val + np.sqrt(2 * eta) * H @ w - - # y.data = newton_solve(lambda s: nabla_phi(s).reshape(-1)-z.data.reshape(-1),y.reshape(-1), - # verbose = verbose, grad = hessian_phi).view(-1,1) - - # # minimization appraoch - def objective(s): - return torch.sum((nabla_phi(s).reshape(-1) - z.reshape(-1)) ** 2) - - # # - - # x0 = y.reshape(-1).clone().detach().numpy() - # res = minimize(objective, x0, backend='torch', method='Newton-CG', precision='float64', tol=1e-5, hvp_type='vhp') - # y.data = torch.from_numpy(res.x) - - x0 = y.reshape(-1).clone() - res = minimize_torch(objective, x0, method='newton-cg', tol=1e-5) - y.data = res.x - - if verbose: - print("Iter:", k) - print(y.T) - - self.sampled_theta = y.data - - def sample_mla_prime(self, steps=100, verbose=False, stepsize=None): - Gamma_half, invGamma_half = self.packing.cov(inverse=True) - invGamma = invGamma_half.T @ invGamma_half - l, Lambda, u = self.get_constraints() - Lambda = torch.from_numpy(Lambda) @ Gamma_half - - if self.data is not None: - if self.feedback == "count-record" and self.dual == False: - if self.observations is not None: - observations = self.observations @ invGamma_half - phis = self.phis @ invGamma_half - nabla = lambda y: -torch.einsum('i,ij->j', 1. / (observations @ y).view(-1), - observations).view(-1, 1) + \ - torch.sum(phis, dim=0).view(-1, 1) \ - + self.s * invGamma @ y.view(-1, 1) - else: - nabla = lambda theta: torch.sum(phis, dim=0).view(-1, 1) + self.s * invGamma @ theta.view(-1, 1) - - else: - nabla = lambda theta: self.s * invGamma @ theta.view(-1, 1) - - y = self.b + 0.05 * torch.rand(size=(self.get_m(), 1), dtype=torch.float64, requires_grad=True).reshape(-1, - 1) ** 2 - # if self.rate is not None: - # y.data = Gamma_half @ self.rate.data.view(-1,1) + y.data - # else: - y.data = y.data - - if verbose == True: - print("initial point") - print(y.data) - - W = invGamma_half.T @ self.construct_covariance_matrix_laplace() @ invGamma_half - L = float(scipy.sparse.linalg.eigsh(W.numpy(), k=1, which='LM', return_eigenvectors=False, tol=1e-5)) - - if stepsize is None: - eta = 1. / (L + 1) - else: - eta = stepsize / (L + 1) - - from stpy.approx_inference.sampling_helper import get_increment - for k in range(steps): - - nabla_val = nabla(y) - - # cvxpy minimization - # x = cp.Variable((self.get_m(), 1)) - # objective = cp.Minimize( eta * nabla_val.detach().numpy().T @ x - cp.sum(cp.log(x)) -(-1./y.data).T@x) - # constraints = [x >= 0.] - # - # prob = cp.Problem(objective, constraints) - # prob.solve(solver = cp.MOSEK) - - w0 = (eta * nabla_val.data + 1. / y.data) - # initial point for the solve - # w0 = -1./( torch.from_numpy(x.value)) - - # simulate - f = lambda w, n: n / torch.abs(w) - w = get_increment(eta, 1000, f, w0, path=False) - - # back mirror map - y.data = (-1. / w) - - if verbose: - print("Iter:", k) - print(y.T) - - self.sampled_theta = invGamma_half @ y.data - - def sample_hessian_positive_langevin_2(self, steps=500, verbose=False, stepsize=None, preconditioner=True): - - Gamma_half, invGamma_half = self.packing.cov(inverse=True) - invGamma = invGamma_half @ invGamma_half - if self.data is not None: - - if self.feedback == "count-record" and self.dual == False: - - observations = self.observations @ invGamma_half - phis = self.phis @ invGamma_half - - if self.observations is not None: - nabla = lambda y: -torch.einsum('i,ij->j', 1. / (observations @ y).view(-1), - observations).view(-1, 1) + \ - torch.sum(phis, dim=0).view(-1, 1) \ - + self.s * invGamma @ y.view(-1, 1) - else: - nabla = lambda theta: torch.sum(phis, dim=0).view(-1, 1) + self.s * invGamma @ theta.view(-1, 1) - - else: - nabla = lambda theta: self.s * invGamma @ theta.view(-1, 1) - - y = torch.rand(size=(self.get_m(), 1), dtype=torch.float64, requires_grad=True).view(-1) ** 2 - # if self.rate is not None: - # y.data = Gamma_half @ self.rate.data + y.data - - if verbose == True: - print("initial point") - print(y.data) - - W = self.construct_covariance_matrix_laplace(minimal=True) - L = float(scipy.sparse.linalg.eigsh(W.numpy(), k=1, which='LM', return_eigenvectors=False, tol=1e-5)) - - if stepsize is None: - eta = 1. / (L + 1) - else: - eta = stepsize / (L + 1) - - for k in range(steps): - w = torch.randn(size=(self.get_m(), 1)).double() / torch.abs(y.data).view(-1, 1) - nabla_val = nabla(y) - z = -1. / y.data.view(-1, 1) + self.b - eta * Gamma_half @ nabla_val + np.sqrt(2 * eta) * Gamma_half @ w - y.data = -1. / z + self.b - - if verbose: - print("Iter:", k) - print(y.T) - - self.sampled_theta = invGamma_half @ y.data - - def sample_newton_langevin(self, steps=1000, stepsize=None, verbose=False): - Gamma_half, invGamma_half = self.packing.cov(inverse=True) - invGamma = invGamma_half @ invGamma_half - if self.data is not None: - - if self.feedback == "count-record" and self.dual == False: - - observations = self.observations @ invGamma_half - phis = self.phis @ invGamma_half - - if self.observations is not None: - nabla = lambda y, bar: -torch.einsum('i,ij->j', 1. / (observations @ y).view(-1), - observations).view(-1, 1) + \ - torch.sum(phis, dim=0).view(-1, 1) \ - + self.s * invGamma @ y.view(-1, 1) - bar * 1. / y - else: - nabla = lambda theta, bar: torch.sum(phis, dim=0).view(-1, 1) + self.s * invGamma @ theta.view( - -1, 1) - bar * 1. / theta - - else: - nabla = lambda theta, bar: self.s * invGamma @ theta.view(-1, 1) - bar * 1. / theta - - y = 0.05 * torch.rand(size=(self.get_m(), 1), dtype=torch.float64, requires_grad=True).view(-1, 1) ** 2 - - barrier = 10. - # hessian = lambda theta,bar: torch.einsum('ik,k,kj->ij',observations.T,(observations@theta).view(-1),observations) + invGamma + bar/theta**2 - hessian = lambda theta, bar: observations.T @ torch.diag( - 1 / (observations @ theta).view(-1) ** 2) @ observations + invGamma + torch.diag(bar / theta.view(-1) ** 2) - hessian_sqrt = lambda theta, bar: torch.cholesky(hessian(theta, bar)) - eta = 1. - - for k in range(steps): - w = torch.randn(size=(self.get_m(), 1)).double() - nabla_val = nabla(y, barrier) - y.data = y.data - torch.linalg.solve(hessian(y.data, barrier), nabla_val) + np.sqrt( - 2 * eta) * torch.linalg.solve(hessian_sqrt(y.data, barrier), w) - - if verbose: - print("Iter:", k) - print(y.T) - - self.sampled_theta = invGamma_half @ y.data - - # self.sampled_theta = y.data - - def sample_hmc(self, steps=1000, stepsize=None, verbose=False): - import hamiltorch - phis = self.phis - if self.feedback == "count-record" and self.dual == False: - if self.observations is not None: - obs = self.observations - func = lambda y: torch.sum(torch.log(obs @ y)) \ - - torch.sum((phis @ y)) \ - - self.s * y.T @ y - else: - func = lambda y: - torch.sum(phis @ y).view(-1, 1) \ - - self.s * y.T @ y - - num_samples = 1 - num_steps_per_sample = steps - if stepsize is None: - step_size = 1e-8 - else: - step_size = stepsize - - params_init = self.rate - self.sample_theta = hamiltorch.sample(log_prob_func=func, - params_init=params_init, - num_samples=num_samples, - step_size=step_size, - num_steps_per_sample=num_steps_per_sample) - print(self.sampled_theta) - - def sample_variational(self, xtest, accuracy=1e-4, verbose=False, samples=1): - from stpy.approx_inference.variational_mf import VMF_SGCP - cov_params = [self.kernel_object.kappa, self.kernel_object.gamma] - S_borders = np.array([[-1., 1.]]) - num_inducing_points = self.m - num_integration_points = 256 - X = self.x - - var_mf_sgcp = VMF_SGCP(S_borders, X, cov_params, num_inducing_points, - num_integration_points=num_integration_points, - update_hyperparams=False, output=0, conv_crit=accuracy) - var_mf_sgcp.run() - sample_paths = var_mf_sgcp.sample_posterior(xtest, num_samples=1.) - return sample_paths - - def sample(self, verbose=False, steps=1000, domain=None): - """ - :return: - """ - if self.steps is not None: - steps = self.steps - - if self.stepsize is not None: - stepsize = self.stepsize - else: - stepsize = None - - l, Lambda, u = self.get_constraints() - print("Sampling started.") - if self.rate is None: - self.fit_gp() - - if self.sampling == 'mirror': - self.sample_mirror_langevin(steps=steps, verbose=verbose) - elif self.sampling == 'proximal+prox': - self.sample_proximal_langevin_prox(steps=steps, verbose=verbose) - elif self.sampling == "proximal+simple_prox": - self.sample_proximal_langevin_simple_prox(steps=steps, verbose=verbose) - elif self.sampling == "hessian": - self.sample_hessian_positive_langevin(steps=steps, verbose=verbose, stepsize=stepsize) - elif self.sampling == "hessian2": - self.sample_hessian_positive_langevin_2(steps=steps, verbose=verbose, stepsize=stepsize) - elif self.sampling == "mla_prime": - self.sample_mla_prime(steps=steps, verbose=verbose, stepsize=stepsize) - elif self.sampling == 'hmc': - self.sample_hmc(steps=steps, verbose=verbose, stepsize=stepsize) - elif self.sampling == 'polyia_variational': - self.sample_variational(accuracy=1. / steps, verbose=verbose) - else: - raise NotImplementedError("Sampling of such is not supported.") - - print("Sampling finished.") - - def sampled_lcb_ucb(self, xtest, samples=100, delta=0.1): - paths = [] - for i in range(samples): - self.sample() - path = self.sample_path_points(xtest).view(1, -1) - paths.append(path) - - paths = torch.cat(paths, dim=0) - lcb = torch.quantile(paths, delta, dim=0) - ucb = torch.quantile(paths, 1 - delta, dim=0) - return lcb, ucb - - def penalized_likelihood_fast(self, threads=4): - l, Lambda, u = self.get_constraints() - Gamma_half, invGamma_half = self.cov(inverse=True) - - if self.dual == False: - # using all points without anchor points - if self.observations is not None: - def objective(theta): - return -torch.sum(torch.log(self.observations @ invGamma_half @ theta)) + torch.sum( - self.phis @ invGamma_half @ theta) + self.s * 0.5 * torch.sum((invGamma_half @ theta) ** 2) - else: - def objective(theta): - return torch.sum(self.phis @ invGamma_half @ theta) + self.s * 0.5 * torch.sum( - (invGamma_half @ theta) ** 2) - else: - # using anchor points - mask = self.bucketized_counts > 0 - phis = self.varphis[mask, :] - tau = self.total_bucketized_time[mask] - - if self.observations is not None: - observations = self.anchor_points_emb - weights = self.anchor_weights - mask = weights > 0. - - def objective(theta): - return -torch.einsum('i,i', weights[mask], - torch.log(observations[mask, :] @ invGamma_half @ theta)) + torch.einsum('i,i', - tau, - phis @ invGamma_half @ theta) + self.s * 0.5 * torch.sum( - (invGamma_half @ theta) ** 2) - else: - def objective(theta): - return torch.einsum('i,i', tau, phis @ invGamma_half @ theta) + self.s * 0.5 * torch.sum( - (invGamma_half @ theta) ** 2) - - if self.rate is not None: - theta0 = torch.zeros(size=(self.get_m(), 1)).view(-1).double() - theta0.data = self.rate.data - else: - theta0 = torch.zeros(size=(self.get_m(), 1)).view(-1).double() - - eps = 1e-4 - res = minimize(objective, theta0.numpy(), backend='torch', method='L-BFGS-B', - bounds=(l[0] + eps, u[0]), precision='float64', tol=1e-8, - options={'ftol': 1e-08, - 'gtol': 1e-08, 'eps': 1e-08, - 'maxfun': 15000, 'maxiter': 15000, - 'maxls': 20}) - - self.rate = invGamma_half @ torch.from_numpy(res.x) - print(res.message) - return self.rate - - def penalized_likelihood(self, threads=4): - - theta = cp.Variable(self.get_m()) - l, Lambda, u = self.get_constraints() - - Gamma_half = self.cov(inverse=False) - - if self.dual == False: - - # using all points without anchor points - phis = self.phis.numpy() - if self.observations is not None: - observations = self.observations.numpy() - objective = cp.Minimize(-cp.sum(cp.log(observations @ theta)) + - cp.sum(phis @ theta) + self.s * 0.5 * cp.sum_squares(theta)) - else: - objective = cp.Minimize(cp.sum(phis @ theta) + self.s * 0.5 * cp.sum_squares(theta)) - - else: - - # using anchor points - mask = self.bucketized_counts.clone().numpy() > 0 - phis = self.varphis[mask, :].clone().numpy() - tau = self.total_bucketized_time[mask].clone().numpy() - - if self.observations is not None: - observations = self.anchor_points_emb.numpy() - weights = self.anchor_weights.numpy() - mask = weights > 0. - objective = cp.Minimize(-cp.sum(cp.multiply(weights[mask], cp.log(observations[mask, :] @ theta))) + - cp.sum(cp.multiply(tau, phis @ theta)) + self.s * 0.5 * cp.sum_squares(theta)) - else: - objective = cp.Minimize(cp.sum(cp.multiply(tau, phis @ theta)) + self.s * 0.5 * cp.sum_squares(theta)) - - constraints = [] - - Lambda = Lambda @ Gamma_half.numpy() - - constraints.append(Lambda @ theta >= l) - constraints.append(Lambda @ theta <= u) - - prob = cp.Problem(objective, constraints) - - if self.rate is not None: - theta.value = self.rate.numpy() - - try: - prob.solve(solver=cp.CLARABEL, warm_start=False, verbose=True) - - self.rate = torch.from_numpy(theta.value) - return self.rate - except: - print("Optimization failed. Using the old value.") - print(prob.status) - return self.rate - - def penalized_likelihood_integral(self, threads=4): - - phis = self.phis.numpy() - counts = self.counts.numpy() - - theta = cp.Variable(self.get_m()) - l, Lambda, u = self.get_constraints() - Gamma_half = self.cov().numpy() - objective = cp.Minimize(-cp.sum(counts @ cp.log(phis @ theta)) + cp.sum(phis @ theta) - + self.s * 0.5 * cp.sum_squares(theta)) - - constraints = [] - Lambda = Lambda @ Gamma_half - constraints.append(Lambda @ theta >= l) - constraints.append(Lambda @ theta <= u) - - # if self.rate is not None: - # theta.value = self.rate.numpy() - try: - prob = cp.Problem(objective, constraints) - prob.solve(solver=cp.CLARABEL, warm_start=False, verbose=True) - self.rate = torch.from_numpy(theta.value) - except: - print("Optimization failed. Using the old value.") - print(prob.status) - - return self.rate - - def bucketization(self): - - phis = [] - observations = [] - - # project sets to smallest forms, and then sum on those only - basic_sets = self.basic_sets - - data_basic = [[] for _ in range(len(basic_sets))] - sensing_times = [[] for _ in range(len(basic_sets))] - counts = torch.zeros(len(basic_sets)).int() - total_data = 0. - self.total_bucketized_obs = torch.zeros(size=(len(basic_sets), 1)).double().view(-1) - self.total_bucketized_time = torch.zeros(size=(len(basic_sets), 1)).double().view(-1) - - for sample in self.data: - S, obs, dt = sample - if obs is not None: - total_data = total_data + obs.size()[0] # total counts - for index, elementary in enumerate(basic_sets): # iterate over basic sets - mask = elementary.is_inside(obs) # mask which belong to the elementary - if S.inside(elementary) == True: - data_basic[index].append(obs[mask]) - counts[index] += 1 - sensing_times[index].append(dt) - else: - for index, elementary in enumerate(basic_sets): - if S.inside(elementary) == True: - data_basic[index].append(torch.Tensor([])) - counts[index] += 1 - sensing_times[index].append(dt) - - for index, elementary in enumerate(basic_sets): - arr = np.array([int(elem.size()[0]) for elem in data_basic[index]]) # counts over sensing rounds - phi = self.packing.integral(elementary) # * counts[index] - - self.total_bucketized_obs[index] = float(np.sum(arr)) - self.total_bucketized_time[index] = float(np.sum(sensing_times[index])) - - observations.append(arr) - phis.append(phi.view(1, -1)) # construct varphi_B - - self.bucketized_obs = observations.copy() # these are number of counts associated with sensings - self.bucketized_time = sensing_times.copy() # these are times each basic set has been sensed - self.bucketized_counts = counts # these are count each basic set has been sensed - - def variance_correction(self, variance): - - if self.var_cor_on == 1: - - g = lambda B, k, mu: -0.5 * (B ** 2) / ((mu ** 2) * k) - B / (mu * k) + (np.exp(B / (k * mu)) - 1) - gn = lambda k: g(self.U, k, variance) - - from scipy import optimize - k = optimize.bisect(gn, 1, 10000000) - - return k - else: - return 1. - - def least_squares_weighted(self, threads=4): - - # if self.approx_fit == False: - # self.bucketization() - - theta = cp.Variable(self.get_m()) - l, Lambda, u = self.get_constraints() - Gamma_half = self.cov().numpy() - - mask = self.bucketized_counts.clone().numpy() > 0 - observations = self.total_bucketized_obs[mask].clone().numpy() - phis = self.varphis[mask, :].clone().numpy() - tau = self.total_bucketized_time.clone().numpy() - - variances = self.variances.view(-1).clone().numpy() - - for i in range(variances.shape[0]): - if mask[i] > 0: - variances[i] = variances[i] * tau[i] * self.variance_correction(variances[i] * tau[i]) - - selected_variances = variances[mask] - objective = cp.Minimize( - cp.sum_squares((cp.multiply((phis @ theta), tau[mask]) - observations) / (np.sqrt(selected_variances))) - + 0.5 * self.s * cp.norm2(theta) ** 2) - - constraints = [] - Lambda = Lambda @ Gamma_half - # constraints.append(Lambda @ theta >= l) - constraints.append(Lambda @ theta <= u) - - prob = cp.Problem(objective, constraints) - - prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False, - mosek_params={mosek.iparam.num_threads: threads, - mosek.iparam.intpnt_solve_form: mosek.solveform.primal, - mosek.dparam.intpnt_co_tol_pfeas: 1e-4, - mosek.dparam.intpnt_co_tol_dfeas: 1e-4, - mosek.dparam.intpnt_co_tol_rel_gap: 1e-4}) - print(prob.status) - self.rate = torch.from_numpy(theta.value) - return self.rate - - def least_sqaures_weighted_fast(self, threads=4): - - l, Lambda, u = self.get_constraints() - Gamma_half, invGamma_half = self.cov(inverse=True) - - mask = self.bucketized_counts > 0 - observations = self.total_bucketized_obs[mask] - phis = self.varphis[mask, :] - tau = self.total_bucketized_time - - variances = self.variances.view(-1) - for i in range(variances.size()[0]): - if mask[i] > 0: - variances[i] = variances[i] * tau[i] * self.variance_correction(variances[i] * tau[i]) - selected_variances = variances[mask] - - def objective(theta): - return torch.sum( - ((tau[mask] * (phis @ invGamma_half @ theta) - observations) / (np.sqrt(selected_variances))) ** 2) \ - + self.s * 0.5 * torch.sum((invGamma_half @ theta) ** 2) - - if self.rate is not None: - theta0 = torch.zeros(size=(self.get_m(), 1)).view(-1).double() - theta0.data = Gamma_half @ self.rate.data - else: - theta0 = torch.zeros(size=(self.get_m(), 1)).view(-1).double() - - eps = 1e-4 - res = minimize(objective, theta0.numpy(), backend='torch', method='L-BFGS-B', - bounds=(l[0] + eps, u[0]), precision='float64', tol=1e-8, - options={'ftol': 1e-06, - 'gtol': 1e-06, 'eps': 1e-08, - 'maxfun': 15000, 'maxiter': 15000, - 'maxls': 20}) - self.rate = invGamma_half @ torch.from_numpy(res.x) - - return self.rate - - def least_squares_weighted_integral(self, threads=4): - - # if self.approx_fit == False: - # self.bucketization() - - theta = cp.Variable(self.get_m()) - l, Lambda, u = self.get_constraints() - Gamma_half = self.cov().numpy() - - phis = self.phis.clone().numpy() # integrated actions - if self.rate is None: - rate = torch.pinverse(torch.from_numpy(Gamma_half)) @ torch.from_numpy(u) - else: - rate = self.rate.clone() - - if len(self.variances_histogram) > 0: - variances = self.variances_histogram.numpy() - - for i in range(variances.shape[0]): - variances[i] = variances[i] * self.variance_correction(variances[i]) - else: - variances = np.zeros(len(self.data)) - i = 0 - for S, obs, dt in self.data: - variances[i] = S.volume() * self.B - variances[i] = variances[i] * self.variance_correction(variances[i]) - i = i + 1 - - observations = self.counts.clone().numpy() - - objective = cp.Minimize(cp.sum_squares((phis @ theta - observations) / np.sqrt(variances)) - + self.s * cp.sum_squares(theta)) - constraints = [] - Lambda = Lambda @ Gamma_half - constraints.append(Lambda @ theta >= l) - constraints.append(Lambda @ theta <= u) - prob = cp.Problem(objective, constraints) - - prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False, - mosek_params={mosek.iparam.num_threads: threads, - mosek.iparam.intpnt_solve_form: mosek.solveform.dual, - mosek.dparam.intpnt_co_tol_pfeas: 1e-6, - mosek.dparam.intpnt_co_tol_dfeas: 1e-6, - mosek.dparam.intpnt_co_tol_rel_gap: 1e-6}) - - self.rate = torch.from_numpy(theta.value) - - return self.rate - - def penalized_likelihood_bins(self, threads=4): - theta = cp.Variable(self.get_m()) - l, Lambda, u = self.get_constraints() - Gamma_half = self.cov().numpy() - - mask = self.bucketized_counts.clone().numpy() > 0 - observations = self.total_bucketized_obs[mask].clone().numpy() - phis = self.varphis[mask, :].clone().numpy() - tau = self.total_bucketized_time[mask].clone().numpy() - - constraints = [] - Lambda = Lambda @ Gamma_half - constraints.append(Lambda @ theta >= l) - constraints.append(Lambda @ theta <= u) - - objective = cp.Minimize( - -cp.sum(observations @ cp.log(cp.multiply(tau, phis @ theta))) + cp.sum(cp.multiply(phis @ theta, tau)) - + self.s * 0.5 * cp.sum_squares(theta)) - prob = cp.Problem(objective, constraints) - try: - prob.solve(solver=cp.CLARABEL, warm_start=False, verbose=True) - - self.rate = torch.from_numpy(theta.value) - except: - print("optimization failed.") - return self.rate - - def penalized_likelihood_integral_bins(self, threads=4): - phis = self.phis.numpy() - counts = self.counts.numpy() - - theta = cp.Variable(self.get_m()) - l, Lambda, u = self.get_constraints() - Gamma_half = self.cov().numpy() - objective = cp.Minimize(-cp.sum(counts @ cp.log(phis @ theta)) + cp.sum(phis @ theta) - + self.s * 0.5 * cp.sum_squares(theta)) - - constraints = [] - Lambda = Lambda @ Gamma_half - constraints.append(Lambda @ theta >= l) - constraints.append(Lambda @ theta <= u) - - try: - if constraints: - prob = cp.Problem(objective, constraints) - else: - prob = cp.Problem(objective) - prob.solve(solver=cp.CLARABEL, warm_start=False, verbose=True) - self.rate = torch.from_numpy(theta.value) - except: - print("Optimization failed. Using the old value.") - - return self.rate - - def update_variances(self, value=False, force=False): - self.approx_fit = True - if (self.feedback == "count-record" and self.estimator=="least-sq") or force == True: - print("updating variance") - for index, set in enumerate(self.basic_sets): - if value == False: - ucb = self.ucb(set) - self.variances[index] = np.minimum(ucb, self.variances[index]) - else: - self.variances[index] = self.mean_set(set) - else: - if self.data is not None: - if self.peeking == True: - new_var = [] - for S, _, dt in self.data: - new_var.append(float(self.ucb(S)) * dt) - self.variances_histogram = torch.Tensor(new_var.copy()).double() - else: - last = self.data[-1] - new_var = torch.Tensor([self.ucb(last[0]) * last[2]]).double() - if len(self.variances_histogram) > 0: - self.variances_histogram = torch.cat((self.variances_histogram, new_var)) - else: - self.variances_histogram = new_var - self.approx_fit = False - - def ucb(self, S, dt=1., delta=0.5): - - if self.data is None or self.rate is None: - return self.B * S.volume() * dt - - if self.approx == None: - - if self.uncertainty == "laplace": - return self.mean_var_laplace_set(S, dt=dt, beta=self.beta(0))[1] - - elif self.uncertainty == "least-sq": - return self.mean_var_reg_set(S, dt=dt, beta=self.beta(0))[1] - - elif self.uncertainty == "bins": - return self.mean_var_bins_set(S, dt=dt, beta=self.beta(0))[1] - - elif self.uncertainty == "likelihood-ratio": - return self.mean_var_ratio_set(S, dt=dt, beta=self.beta(0))[1] - - elif self.uncertainty == "conformal": - return self.mean_var_conformal_set(S, dt=dt, delta=delta)[2] - - else: - raise AssertionError("Not Implemented.") - - elif self.approx == "ellipsoid": - - if self.approx_fit == False: - self.fit_ellipsoid_approx() - self.beta(0) - print("Fitting Approximation.") - self.approx_fit = True - return self.map_lcb_ucb_approx_action(S, dt=dt, beta=self.beta(0))[2] - else: - raise AssertionError("Not implemented.") - - def mean_std_per_action(self, S, W, dt, beta): - - phi = self.packing.integral(S) * dt - Gamma_half = self.cov().numpy() - - l, Lambda, u = self.get_constraints() - - Lambda = Lambda @ Gamma_half - ucb, _ = maximize_on_elliptical_slice(phi.numpy(), (W).numpy(), self.rate.view(-1).numpy(), beta, l, Lambda, u) - lcb, _ = maximize_on_elliptical_slice(-phi.numpy(), (W).numpy(), self.rate.view(-1).numpy(), beta, l, Lambda, u) - map = phi @ self.rate - - return map, float(ucb), -float(lcb) - - def mean_var_laplace_set(self, S, dt, beta=2.): - if self.approx_fit == False: - self.W = self.construct_covariance_matrix_laplace() - self.approx_fit = True - return self.mean_std_per_action(S, self.W, dt, beta) - - def mean_var_reg_set(self, S, dt, beta=2.): - if self.approx_fit == False: - self.W = self.construct_covariance_matrix_regression() - self.approx_fit = True - return self.mean_std_per_action(S, self.W, dt, beta) - - def mean_var_bins_set(self, S, dt, beta=2.): - if self.approx_fit == False: - self.W = self.construct_covariance_matrix_bins() - self.approx_fit = True - return self.mean_std_per_action(S, self.W, dt, beta) - - def mean_var_ratio_set(self, S, dt, beta=2.): - x = self.packing.integral(S) * dt - map = x @ self.rate - # v = np.log(1. / 0.1) - torch.sum(self.counts.double() @ torch.log(self.phis.double() @ self.rate)) \ - # + torch.sum(self.phis.double() @ self.rate) + 0.5 * self.s * torch.norm(self.rate) ** 2 - v = np.log(1. / 0.1) + self.likelihood + 0.5 * self.s * torch.norm(self.rate) ** 2 - - phis = self.phis.numpy() - counts = self.counts.numpy() - theta = cp.Variable(self.get_m()) - l, Lambda, u = self.get_constraints() - Gamma_half = self.cov().numpy() - - objective_min = cp.Minimize(x @ theta) - objective_max = cp.Maximize(x @ theta) - - constraints = [] - Lambda = Lambda @ Gamma_half - constraints.append(Lambda @ theta >= l) - constraints.append(Lambda @ theta <= u) - - constraints.append( - -cp.sum(counts @ cp.log(phis @ theta)) + cp.sum(phis @ theta) + self.s * 0.5 * cp.sum_squares( - theta) <= v) - - prob = cp.Problem(objective_min, constraints) - prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False) - lcb = np.dot(theta.value, x) - prob = cp.Problem(objective_max, constraints) - prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False) - ucb = np.dot(theta.value, x) - - return map, ucb, lcb - - def map_lcb_ucb_approx_action(self, S, dt=1., beta=2.): - phi = self.packing.integral(S) - map = dt * phi @ self.rate - - ucb = map + beta * np.sqrt(phi @ self.W_inv_approx @ phi.T) - # ucb = np.minimum(dt * ucb, self.B * S.volume() * dt) - - lcb = map - beta * np.sqrt(phi @ self.W_inv_approx @ phi.T) - # lcb = np.maximum(dt * lcb, self.b * S.volume() * dt) - return map, lcb, ucb - - def fit_ellipsoid_approx(self): - - if self.uncertainty == "laplace": - self.W = self.construct_covariance_matrix_laplace() - elif self.uncertainty == 'least-sq': - self.W = self.construct_covariance_matrix_regression() - elif self.uncertainty == 'bins': - self.W = self.construct_covariance_matrix_bins() - else: - raise AssertionError("Not implemented.") - - self.W_inv_approx = torch.pinverse(self.W) - - def construct_covariance_matrix(self): - if self.estimator == "likelihood": - self.W = self.construct_covariance_matrix_laplace() - elif self.estimator == "least-sq": - self.W = self.construct_covariance_matrix_regression() - elif self.estimator == "bins": - self.W = self.construct_covariance_matrix_bins() - else: - raise NotImplementedError("This estimator is not implemented.") - return self.W - - def construct_covariance_matrix_laplace(self, theta=None): - W = torch.zeros(size=(self.get_m(), self.get_m())).double() - - if self.feedback == "count-record": - - if self.observations is not None: - - if theta is None: - D = torch.diag(1. / ((self.observations @ self.rate).view(-1) ** 2)) - W = self.observations.T @ D @ self.observations - else: - D = torch.diag(1. / ((self.observations @ theta).view(-1) ** 2)) - W = self.observations.T @ D @ self.observations - - elif self.feedback == "histogram": - # D = torch.diag(self.counts / (self.phis @ self.rate).view(-1) ** 2) - if len(self.variances_histogram) > 0: - variances = self.variances_histogram.view(-1).clone() - - for i in range(variances.shape[0]): - variances[i] = variances[i] * self.variance_correction(variances[i]) - - D = torch.diag(self.counts / variances ** 2) - - W = self.phis.T @ D @ self.phis - else: - raise AssertionError("Not implemented.") - - return W + torch.eye(self.get_m()).double() * self.s - - def construct_covariance_matrix_regression(self): - W = torch.zeros(size=(self.get_m(), self.get_m())).double() - if self.data is not None: - variances = self.variances - if self.feedback == "count-record": - mask = self.bucketized_counts > 0 - tau = self.total_bucketized_time - for index_o, o in enumerate(self.bucketized_obs): - n = mask[index_o] - if n > 0: - A = self.varphis[index_o, :].view(-1, 1) @ self.varphis[index_o, :].view(1, -1) * tau[index_o] - k = self.variance_correction(tau[index_o] * variances[index_o]) - W = W + A / (variances[index_o] * k) - - elif self.feedback == "histogram": - - if len(self.variances_histogram) > 0: - variances = self.variances_histogram.view(-1).clone() - - for i in range(variances.shape[0]): - variances[i] = variances[i] * self.variance_correction(variances[i]) - - D = torch.diag(1. / variances) - W = self.phis.T @ D @ self.phis - - return W + torch.eye(self.get_m()).double() * self.s - - def construct_covariance_matrix_bins(self): - W = torch.zeros(size=(self.get_m(), self.get_m())).double() - - if self.feedback == "count-record": - - mask = self.bucketized_counts > 0 - tau = self.total_bucketized_time - varphis = self.varphis[mask, :] - variances = self.variances.view(-1).clone() - - for i in range(variances.size()[0]): - if mask[i] > 0: - variances[i] = variances[i] * self.variance_correction(variances[i] * tau[i]) - - variances = variances[mask] - tau = tau[mask] - - if self.observations is not None: - D = torch.diag(tau / variances) - W = varphis.T @ D @ varphis - - elif self.feedback == "histogram": - - if len(self.variances_histogram) > 0: - variances = self.variances_histogram.view(-1).clone() - - for i in range(variances.shape[0]): - variances[i] = variances[i] * self.variance_correction(variances[i]) - - D = torch.diag(1. / variances) - W = self.phis.T @ D @ self.phis - else: - raise AssertionError("Not implemented.") - - return W + torch.eye(self.get_m()).double() * self.s - - def gap(self, S, actions, w, dt, beta=2.): - """ - Estimates the gap of an action S, - :param S: - :param dt: - :return: - """ - phi = self.packing.integral(S) * dt - Gamma_half = self.packing.cov().numpy() - - if self.approx is None: - l, Lambda, u = self.get_constraints() - Lambda = Lambda @ Gamma_half - ucbs = [] - for action in actions: - phi_a = self.packing.integral(action) * dt - # ucb, _ = maximize_on_elliptical_slice(phi_a.numpy()-phi.numpy(), self.W.numpy(), self.rate.view(-1).numpy(), beta, l, Lambda, u) - ucb, _ = maximize_on_elliptical_slice(phi.numpy(), self.W.numpy(), - self.rate.view(-1).numpy(), beta, l, Lambda, u) - ucbs.append(float(ucb)) - gap = torch.max(torch.Tensor(ucbs)) - - else: - if self.data is None: - return (self.B - self.b) * S.volume() - - if self.ucb_identified == False: - print("Recomputing UCB.....") - self.ucb_identified = True - self.fit_ellipsoid_approx() - self.max_ucb = -1000 - self.ucb_action = None - - for action in actions: - _, __, ucb = self.map_lcb_ucb_approx_action(action, dt=dt, beta=self.beta(0)) - ucb = ucb / w(action) - - if ucb > self.max_ucb: - self.max_ucb = ucb - self.ucb_action = action - - map, lcb, ucb = self.map_lcb_ucb_approx_action(S, dt=dt, beta=self.beta(0)) - gap = w(S) * self.max_ucb - lcb - return gap - - def information(self, S, dt, precomputed=None): - - if self.data is None: - return 1. - - if self.W is None: - self.construct_covariance_matrix() - - if self.feedback == "count-record": - varphi_UCB = self.packing.integral(self.ucb_action).view(1, -1) * dt - - if precomputed is not None: - Upsilon = precomputed[S] * dt - else: - ind = [] - for index, set in enumerate(self.basic_sets): - if S.inside(set): - ind.append(index) - Upsilon = self.varphis[ind, :] * dt - - I = torch.eye(Upsilon.size()[0]).double() - G = self.W_inv_approx - self.W_inv_approx @ Upsilon.T @ torch.inverse( - I + Upsilon @ Upsilon.T) @ Upsilon @ self.W_inv_approx - return 10e-4 + torch.logdet(varphi_UCB @ self.W_inv_approx @ varphi_UCB.T) - torch.logdet( - varphi_UCB @ G @ varphi_UCB.T) - - elif self.feedback == "histogram": - - return torch.log(1 + self.packing.integral(S) @ self.W_inv_approx @ self.packing.integral(S) * dt ** 2) - - def map_lcb_ucb_approx(self, S, n, beta=2.0, delta=0.01): - xtest = S.return_discretization(n) - if self.data is None: - return self.b + 0 * xtest[:, 0].view(-1, 1), \ - self.b + 0 * xtest[:, 0].view(-1, 1), \ - self.B + 0 * xtest[:, 0].view(-1, 1) - - self.fit_ellipsoid_approx() - self.fit_ellipsoid_approx() - - Phi = self.packing.embed(xtest).double() - map = Phi @ self.rate - N = Phi.size()[0] - - ucb = torch.zeros(size=(N, 1)).double() - lcb = torch.zeros(size=(N, 1)).double() - - for i in range(N): - x = Phi[i, :].view(-1, 1) - ucb[i, 0] = np.minimum(map[i] + beta * np.sqrt(x.T @ self.W_inv_approx @ x), self.B) - lcb[i, 0] = np.maximum(map[i] - beta * np.sqrt(x.T @ self.W_inv_approx @ x), self.b) - return map, lcb, ucb - - def map_lcb_ucb(self, S, n, beta=2.0): - """ - Calculate exact confidence using laplace approximation on a whole set domain - :param S: set - :param n: discretization - :param beta: beta - :return: - """ - - xtest = S.return_discretization(n) - if self.data is None: - return self.b + 0 * xtest[:, 0].view(-1, 1), \ - self.b + 0 * xtest[:, 0].view(-1, 1), \ - self.B + 0 * xtest[:, 0].view(-1, 1) - - N = xtest.size()[0] - Phi = self.packing.embed(xtest) - map = Phi @ self.rate - - if self.uncertainty == "laplace": - W = self.construct_covariance_matrix_laplace() - elif self.uncertainty == "least-sq": - W = self.construct_covariance_matrix_regression() - elif self.uncertainty == "bins": - W = self.construct_covariance_matrix_bins() - else: - raise AssertionError("Not implemented ") - - Gamma_half = self.cov().numpy() - l, Lambda, u = self.get_constraints() - Lambda = Lambda @ Gamma_half - ucb = torch.zeros(size=(N, 1)).double() - lcb = torch.zeros(size=(N, 1)).double() - - for i in range(N): - x = Phi[i, :] - ucbi, _ = maximize_on_elliptical_slice(x.numpy(), (W).numpy(), self.rate.view(-1).numpy(), np.sqrt(beta), l, - Lambda, - u) - lcbi, _ = maximize_on_elliptical_slice(-x.numpy(), (W).numpy(), self.rate.view(-1).numpy(), np.sqrt(beta), - l, Lambda, - u) - ucb[i, 0] = ucbi - lcb[i, 0] = -lcbi - - return map, lcb, ucb - - def map_lcb_ucb_likelihood_ratio(self, S, n, delta=0.1, current=False): - xtest = S.return_discretization(n) - - if self.data is None: - return self.b + 0 * xtest[:, 0].view(-1, 1), \ - self.b + 0 * xtest[:, 0].view(-1, 1), \ - self.B + 0 * xtest[:, 0].view(-1, 1) - - N = xtest.size()[0] - Phi = self.packing.embed(xtest) - map = Phi @ self.rate - - ucb = torch.zeros(size=(N, 1)).double() - lcb = torch.zeros(size=(N, 1)).double() - - phis = self.phis.numpy() - - if current: - if self.observations is not None: - v = np.log(1. / delta) - torch.sum(torch.log(self.observations @ self.rate)) + torch.sum( - self.phis @ self.rate) + self.s * 0.5 * torch.sum(self.rate ** 2) - else: - v = np.log(1. / delta) + torch.sum( - self.phis @ self.rate) + self.s * 0.5 * torch.sum(self.rate ** 2) - else: - if self.feedback == 'count-record': - v = np.log(1. / delta) + self.loglikelihood + 0.5 * self.s * torch.sum(self.rate ** 2) - elif self.feedback == 'histogram': - v = np.log(1. / delta) + self.loglikelihood + 0.5 * self.s * torch.sum(self.rate ** 2) - else: - raise NotImplementedError("Not compatible with given feedback model ") - - l, Lambda, u = self.get_constraints() - Gamma_half = self.cov().numpy() - Lambda = Lambda @ Gamma_half - - for i in range(N): - x = Phi[i, :].numpy() - - theta = cp.Variable(self.get_m()) - - objective_min = cp.Minimize(x @ theta) - objective_max = cp.Maximize(x @ theta) - - constraints = [] - constraints.append(Lambda @ theta >= l) - constraints.append(Lambda @ theta <= u) - - if self.feedback == 'count-record': - if self.observations is not None: - observations = self.observations.numpy() - - constraints.append( - -cp.sum(cp.log(observations @ theta)) + - cp.sum(phis @ theta) + self.s * 0.5 * cp.sum_squares(theta) - <= v) - else: - constraints.append(cp.sum(phis @ theta) + self.s * 0.5 * cp.sum_squares(theta) - <= v) - - elif self.feedback == 'histogram': - constraints.append( - -cp.sum(cp.log(phis @ theta)) + - cp.sum(phis @ theta) + self.s * 0.5 * cp.sum_squares(theta) - <= v) - else: - raise NotImplementedError("Does not exist.") - - prob = cp.Problem(objective_min, constraints) - prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False) - lcb[i, 0] = float(np.dot(theta.value, x)) - - prob = cp.Problem(objective_max, constraints) - prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False) - ucb[i, 0] = float(np.dot(theta.value, x)) - - return map, lcb, ucb - - def mean_var_conformal_set(self, S, dt, beta=2., max_val=None, delta=0.05): - # self.bucketize_prepare() - if max_val is None: - max_val = int(self.B * self.basic_sets[0].volume() * dt) + 1 - map, lcb, ucb = self.conformal_confidence_set(S, delta=delta, max_val=max_val, dt=dt) - return map, lcb, ucb - - def conformal_score_func(self, theta, new, index): - - if new[1] is None: - n_new = 0 - else: - n_new = new[1].size()[0] - - varphi = self.packing.integral(new[0]) * new[2] - err_new = abs(float(n_new) - float(varphi @ theta)) - n = len(self.bucketized_obs[index]) - - if n > 0: - - phis = self.varphis[index].repeat(n, 1) - res = torch.Tensor(self.bucketized_obs[index]).double() - - err = torch.abs(res - (phis @ theta.view(-1, 1)).view(-1)) - - return torch.sum(err < err_new).double() / float(n + 1.) + 1. / (float(n) + 1.) - - else: - return 0. - - def conformal_confidence(self, delta=0.05, max_val=20, dt=1, step=1): - lcb = [] - ucb = [] - map = [] - - if self.data is not None: - self.bucketization(time=True) - - for S in self.basic_sets: - m, u, l = self.conformal_confidence_set(S, delta=delta, max_val=max_val, dt=dt, step=step) - - map.append(m) - ucb.append(u) - lcb.append(l) - - return torch.Tensor(map).double(), torch.Tensor(ucb).double(), torch.Tensor(lcb).double() - - def conformal_confidence_set(self, S, delta=0.05, max_val=20, dt=1., step=1): - """ - :return: (lcb,ucb) - """ - - if self.data is not None: - if self.feedback == "count-record": - self.penalized_likelihood() - elif self.feedback == "histogram": - self.penalized_likelihood_integral() - - # identify the set in basic sets - index = 0 - for set in self.basic_sets: - if set.inside(S): - break - index += 1 - - # calculate map estimate - map = float(self.rate @ self.packing.integral(S)) - else: - map = self.b - return map, self.B, self.b - - scores = [] - j = 0 - score = 1. - lowest = 0 - n = float(len(self.bucketized_obs[index])) - - while score > np.ceil((1 - delta) * (n + 1)) / (n + 1) and j <= max_val: - lowest = j - if j > 0: - obs = torch.zeros(size=(j, self.d)).double() - for i in range(self.d): - obs[:, i] = torch.from_numpy(np.random.uniform(S.bounds[i, 0], S.bounds[i, 1], size=j)) - else: - obs = None - - # new observation - new = (S, obs, dt) - - old_phis, old_observations, old_counts = self.add_data_point_and_remove(new) - - if self.feedback == "count-record": - theta_new = self.penalized_likelihood() - elif self.feedback == "histogram": - theta_new = self.penalized_likelihood_integral() - - # restore back the data - self.phis = old_phis - self.observations = old_observations - self.counts = old_counts - - # calculate the score - score = self.conformal_score_func(theta_new, new, index) - n = float(len(self.bucketized_obs[index])) - - print(j, "/", max_val, score, np.ceil((1 - delta) * (n + 1)) / (n + 1)) - j = j + 1 - - j = max_val - score = 1. - largest = max_val - - while score > np.ceil((1 - delta) * (n + 1)) / (n + 1) and j > lowest: - largest = j - if j > 0: - obs = torch.zeros(size=(j, self.d)).double() - for i in range(self.d): - obs[:, i] = torch.from_numpy(np.random.uniform(S.bounds[i, 0], S.bounds[i, 1], size=j)) - else: - obs = None - - # new observation - new = (S, obs, dt) - - old_phis, old_observations, old_counts = self.add_data_point_and_remove(new) - - if self.feedback == "count-record": - theta_new = self.penalized_likelihood() - elif self.feedback == "histogram": - theta_new = self.penalized_likelihood_integral() - - # restore back the data - self.phis = old_phis - self.observations = old_observations - self.counts = old_counts - - # calculate the score - score = self.conformal_score_func(theta_new, new, index) - n = float(len(self.bucketized_obs[index])) - - print(j, "/", max_val, score, np.ceil((1 - delta) * (n + 1)) / (n + 1)) - j = j - 1 - # scores = np.array(scores) - # mask = scores < np.ceil((1-delta)*(n+1))/(n+1) - - # if np.sum(mask) == 0: - # lowest = 0 - # largest = max_val - # else: - # lowest = np.min(np.arange(0,max_val,step)[mask]) - # largest = np.max(np.arange(0, max_val, step)[mask]) - - lcb = lowest / dt / S.volume() - ucb = largest / dt / S.volume() - - return (map, ucb, lcb) + def __init__( + self, + process, + hierarchy, + d=1, + m=100, + kernel_object=None, + B=1.0, + s=1.0, + jitter=10e-8, + b=0.0, + basis="triangle", + estimator="likelihood", + feedback="count-record", + offset=0.1, + uncertainty="laplace", + approx=None, + stepsize=None, + embedding=None, + beta=2.0, + sampling="proximal+prox", + peeking=True, + constraints=True, + var_cor_on=True, + samples_nystrom=15000, + inverted_constraint=False, + steps=None, + dual=True, + no_anchor_points=1024, + U=1.0, + opt="torch", + ): + + self.process = process + self.d = d + self.s = s + self.b = b + self.B = B + self.U = U + self.stepsize = stepsize + self.sampling = sampling + self.steps = steps + self.opt = opt + self.kernel_object = kernel_object + # set hierarchy + self.constraints = constraints + self.hierarchy = hierarchy + self.ucb_identified = False + self.inverted_constraint = inverted_constraint + # approximation + self.loglikelihood = 0.0 + self.dual = dual + self.peeking = peeking + self.no_anchor_points = no_anchor_points + if beta < 0.0: + self.beta = lambda t: self.beta_theory() + else: + self.beta = lambda t: beta + self.var_cor_on = var_cor_on + + if basis == "triangle": + self.packing = TriangleEmbedding( + d, + m, + kernel_object=kernel_object, + B=B, + b=b, + offset=offset, + s=np.sqrt(jitter), + ) + elif basis == "bernstein": + self.packing = BernsteinEmbedding( + d, + m, + kernel_object=kernel_object, + B=B, + b=b, + offset=offset, + s=np.sqrt(jitter), + ) + elif basis == "splines": + self.packing = BernsteinSplinesEmbedding( + d, + m, + kernel_object=kernel_object, + B=B, + b=b, + offset=offset, + s=np.sqrt(jitter), + ) + elif basis == "nystrom": + self.packing = PositiveNystromEmbeddingBump( + d, + m, + kernel_object=kernel_object, + B=B, + b=b, + offset=offset, + s=np.sqrt(jitter), + samples=samples_nystrom, + ) + elif basis == "overlap-splines": + self.packing = BernsteinSplinesOverlapping( + d, + m, + kernel_object=kernel_object, + B=B, + b=b, + offset=offset, + s=np.sqrt(jitter), + ) + elif basis == "faber": + self.packing = FaberSchauderEmbedding( + d, + m, + kernel_object=kernel_object, + B=B, + b=b, + offset=offset, + s=np.sqrt(jitter), + ) + elif basis == "optimal-positive": + self.packing = OptimalPositiveBasis( + d, + m, + kernel_object=kernel_object, + B=B, + b=b, + offset=offset, + s=np.sqrt(jitter), + samples=samples_nystrom, + ) + elif basis == "custom": + self.packing = embedding + else: + raise NotImplementedError("The request positive basis is not implemented.") + self.m = m + self.data = None + self.covariance = False + + # stabilizing the matrix inversion + self.jitter = jitter + + # for variance stabilization + self.stabilization = None + self.approx_fit = False + + # properties of rate estimator + self.estimator = estimator + self.feedback = feedback + self.uncertainty = uncertainty + self.approx = approx + + # precompute information + self.basic_sets = self.hierarchy.get_sets_level(self.hierarchy.levels) + + self.varphis = torch.zeros(size=(len(self.basic_sets), self.get_m())).double() + self.variances = torch.ones(size=(len(self.basic_sets), 1)).double().view(-1) + self.variances_histogram = [] + self.observations = None + self.rate = None + self.W = (s) * torch.eye(self.get_m()).double() + self.W_inv_approx = (1.0 / s) * torch.eye(self.get_m()).double() + self.beta_value = 2.0 + self.sampled_theta = None + + if self.dual == True: + if self.d == 1: + anchor = no_anchor_points + self.anchor_points = self.hierarchy.top_node.return_discretization( + anchor + ) + self.anchor_weights = torch.zeros(size=(anchor, 1)).double().view(-1) + elif self.d == 2: + anchor = no_anchor_points + self.anchor_points = self.hierarchy.top_node.return_discretization( + int(np.sqrt(anchor)) + ) + self.anchor_weights = torch.zeros(size=(anchor, 1)).double().view(-1) + self.global_dt = 0.0 + self.anchor_points_emb = self.packing.embed(self.anchor_points) + + if feedback == "count-record" and basis != "custom": + print("Precomputing phis.") + for index_set, set in enumerate(self.basic_sets): + self.varphis[index_set, :] = self.packing.integral(set) + self.variances[index_set] = set.volume() * self.B + else: + pass + + print("Precomputation finished.") + + def add_data_point(self, new_data, times=True): + + super().add_data_point(new_data, times=times) + + if self.rate is not None: + rate = self.rate + else: + l, _, u = self.get_constraints() + Gamma_half = self.cov() + rate = Gamma_half @ u + + if self.feedback == "histogram": + val = self.packing.integral(new_data[0]) @ rate * new_data[2] + v = -np.log(val) + val + + elif self.feedback == "count-record": + v = self.packing.integral(new_data[0]) @ rate * new_data[2] + if new_data[1] is not None: + val2 = self.packing.embed(new_data[1]) @ rate * new_data[2] + v = v - torch.sum(np.log(val2)) + + self.loglikelihood += v + + def beta_theory(self): + if self.approx_fit == False: + l, Lambda, u = self.get_constraints() + Gamma_half, invGamma_half = self.cov(inverse=True) + + ## norm + norm = self.s + + ## constraints + eps = 10e-3 + res = Gamma_half @ self.rate.view(-1, 1) - torch.from_numpy(l).view(-1, 1) + xi = res.clone() + xi[res > eps] = 0.0 + + constraint = xi.T @ Gamma_half @ self.W_inv_approx @ Gamma_half.T @ xi + + ## concentration + vol = ( + 4 * np.log(1.0 / 0.1) + + torch.logdet(self.W) + - self.get_m() * np.log(self.s) + ) + self.beta_value = np.sqrt(norm + vol + constraint) + print("-------------------") + print("New beta:", self.beta_value) + print("norm:", norm) + print("constraint:", constraint) + print("vol:", vol) + print("-------------------") + else: + pass + return self.beta_value + + def get_constraints(self): + return self.packing.get_constraints() + + def cov(self, inverse=False): + return self.packing.cov(inverse=inverse) + + def fit_gp(self, threads=4): + + if self.data is not None: + if self.feedback == "count-record": + + if self.estimator == "likelihood": + if self.opt == "cvxpy": + self.penalized_likelihood(threads=threads) + elif self.opt == "torch": + self.penalized_likelihood_fast(threads=threads) + else: + raise NotImplementedError( + "The optimization method does not exist" + ) + + elif self.estimator == "least-sq": + self.least_squares_weighted() + + elif self.estimator == "bins": + self.penalized_likelihood_bins() + + else: + raise AssertionError("wrong name.") + + elif self.feedback == "histogram": + + if self.estimator == "likelihood": + self.penalized_likelihood_integral() + + elif self.estimator == "least-sq": + self.least_squares_weighted_integral() + + elif self.estimator == "bins": + self.penalized_likelihood_integral_bins() + + else: + raise AssertionError("wrong name.") + else: + raise AssertionError("wrong name.") + else: + l, Lambda, u = self.get_constraints() + Gamma_half = self.cov() + self.rate = l + + def sample_mirror_langevin(self, steps=500, verbose=False): + + l, Lambda, u = self.get_constraints() + Gamma_half, invGamma_half = self.cov(inverse=True) + + v = torch.from_numpy((u + l) / 2.0).view(-1, 1) + S = torch.diag(torch.from_numpy(u - l).view(-1) / 2.0).double() + + phis = self.phis.clone() @ invGamma_half + + if self.observations is not None: + obs = self.observations @ invGamma_half + else: + obs = None + + invGamma = invGamma_half.T @ invGamma_half + transform = lambda y: S @ torch.tanh(y) + v + + if self.feedback == "count-record" and self.dual == False: + if obs is not None: + func = ( + lambda y: -torch.sum(torch.log(obs @ transform(y)).view(-1)) + + torch.sum(phis @ transform(y)) + + self.s * transform(y).T @ invGamma @ transform(y) + + torch.sum(torch.log(1.0 / (1.0 - transform(y) ** 2))) + ) + else: + func = ( + lambda y: torch.sum(phis @ transform(y)) + + self.s * transform(y).T @ invGamma @ transform(y) + + torch.sum(torch.log(1.0 / (1.0 - transform(y) ** 2))) + ) # torch.sum(torch.log(0.5*(1.+torch.cosh(2*y)))) + + elif self.feedback == "count-record" and self.dual == True: + mask = self.bucketized_counts > 0 + phis = self.varphis[mask, :] @ invGamma_half + tau = self.total_bucketized_time[mask] + + if obs is not None: + obs = self.anchor_points_emb @ invGamma_half + weights = self.anchor_weights + mask = weights > 0.0 + + func = ( + lambda y: -torch.sum( + weights[mask].view(-1, 1) + * torch.log(obs[mask, :] @ transform(y)) + ) + + torch.sum(tau.view(-1, 1) * (phis @ transform(y))) + + self.s * transform(y).T @ invGamma @ transform(y) + + torch.sum(torch.log(1.0 / (1.0 - (transform(y) ** 2)))) + ) # + torch.sum(torch.log(0.5*(1.+torch.cosh(2*y)))) + else: + func = ( + lambda y: torch.sum(tau.view(-1, 1) * (phis @ transform(y))) + + self.s * transform(y).T @ invGamma @ transform(y) + + torch.sum(torch.log(1.0 / (1.0 - transform(y) ** 2))) + ) # + torch.sum(torch.log(0.5*(1.+torch.cosh(2*y)))) + + elif self.feedback == "histogram": + func = ( + lambda y: -torch.sum( + self.counts.clone().view(-1) + * torch.log(phis @ (S @ torch.tanh(y) + v)).view(-1) + ) + + torch.sum(phis @ (S @ torch.tanh(y) + v)) + + self.s + * (S @ torch.tanh(y) + v).T + @ invGamma + @ (S @ torch.tanh(y) + v) + ) + + y = torch.rand(size=(self.get_m(), 1), dtype=torch.float64, requires_grad=True) + + # initiallize with map sqeezed more + y.data = Gamma_half @ self.rate.view(-1, 1) # u < theta < l + + u_new = u + 0.01 + l_new = l - 0.01 + v2 = torch.from_numpy((u_new + l_new) / 2.0).view(-1, 1) + S2 = torch.diag(torch.from_numpy(u_new - l_new).view(-1) / 2.0).double() + # + y.data = torch.inverse(S2) @ (y.data - v2) + y.data = torch.atanh(y.data) + + W = ( + S.T + @ invGamma_half.T + @ self.construct_covariance_matrix_laplace() + @ invGamma_half + @ S + ) + L = float( + scipy.sparse.linalg.eigsh( + W.numpy(), k=1, which="LM", return_eigenvectors=False, tol=1e-8 + ) + ) + eta = 0.05 / (L + 1) + + print("Eta:", eta) + + for k in range(steps): + + w = torch.randn(size=(self.get_m(), 1)).double() + nabla_y = torch.autograd.functional.jacobian(func, y).data[0, 0, :, :] + y.data = y.data - eta * nabla_y + np.sqrt(2 * eta) * w + theta = torch.tanh(y).detach() + + if verbose == True: + print("Iter:", k, (S @ theta + v).T) + print(y.T) + + self.sampled_theta = invGamma_half @ transform(y.data) + + def sample_projected_langevin(self, steps=300, verbose=False, stepsize=None): + """ + :param burn_in: + :return: + """ + + Gamma_half = self.packing.cov() + + def prox(x): + z = x.numpy() + theta = cp.Variable((self.get_m(), 1)) + objective = cp.Minimize(cp.sum_squares(z - theta)) + constraints = [] + l, Lambda, u = self.get_constraints() + Lambda = Lambda @ Gamma_half.numpy() + constraints.append(Lambda @ theta >= l.reshape(-1, 1)) + prob = cp.Problem(objective, constraints) + prob.solve( + solver=cp.OSQP, + warm_start=False, + verbose=False, + eps_abs=1e-3, + eps_rel=1e-3, + ) + return torch.from_numpy(theta.value) + + if self.feedback == "count-record" and self.dual == False: + if self.observations is not None: + nabla = ( + lambda y: -torch.einsum( + "i,ij->j", + 1.0 / (self.observations @ y).view(-1), + self.observations, + ).view(-1, 1) + + torch.sum(self.phis, dim=0).view(-1, 1) + + self.s * y.view(-1, 1) + ) + else: + nabla = lambda theta: torch.sum(self.phis, dim=0).view( + -1, 1 + ) + self.s * theta.view(-1, 1) + + elif self.feedback == "count-record" and self.dual == True: + mask = self.bucketized_counts > 0 + phis = self.varphis[mask, :] + tau = self.total_bucketized_time[mask] + + if self.observations is not None: + obs = self.anchor_points_emb + weights = self.anchor_weights + mask = weights > 0.0 + nabla = ( + lambda y: -torch.einsum( + "i,ij->j", + weights[mask] / ((obs[mask, :] @ y).view(-1)), + obs[mask], + ).view(-1, 1) + + torch.einsum("i,ij->j", tau, phis).view(-1, 1) + + self.s * y.view(-1, 1) + ) + else: + nabla = lambda y: torch.einsum("i,ij->j", tau, phis).view( + -1, 1 + ) + self.s * y.view(-1, 1) + + elif self.feedback == "histogram": + nabla = ( + lambda theta: -torch.sum( + torch.diag((1.0 / (self.phis @ theta).view(-1)) * self.counts) + @ self.phis, + dim=0, + ).view(-1, 1) + + torch.sum(self.phis, dim=0).view(-1, 1) + + self.s * theta.view(-1, 1) + ) + + theta = self.rate.view(-1, 1) + W = self.construct_covariance_matrix_laplace(minimal=True) + L = float( + scipy.sparse.linalg.eigsh( + W.numpy(), k=1, which="LM", return_eigenvectors=False, tol=1e-5 + ) + ) + + if stepsize is None: + eta = 0.5 / (L + 1) + else: + eta = np.minimum(1, stepsize * 0.5 / L) + + print(eta) + for k in range(steps): + w = torch.randn(size=(self.get_m(), 1)).double() + theta = prox(theta - eta * nabla(theta) + np.sqrt(2 * eta) * w) + + if verbose == True: + print("Iter:", k, theta.T) + + self.sampled_theta = theta + + def sample_proximal_langevin_prox(self, steps=300, verbose=False, stepsize=None): + """ + :param burn_in: + :return: + """ + + Gamma_half, invGamma_half = self.packing.cov(inverse=True) + # invGamma = invGamma_half.T @ invGamma_half + l, Lambda, u = self.get_constraints() + Lambda = Lambda @ Gamma_half.numpy() + + def prox(x): + res = solve_qp( + np.eye(self.get_m()), + x.numpy().reshape(-1), + C=Gamma_half.numpy(), + b=l.numpy(), + factorized=True, + ) + return torch.from_numpy(res[0]).view(-1, 1) + + # theta_n = cp.Variable((self.get_m(), 1)) + # x = cp.Parameter((self.get_m(), 1)) + # objective = cp.Minimize(cp.sum_squares(x - theta_n)) + # + # constraints = [] + # l, Lambda, u = self.get_constraints() + # Lambda = Lambda @ Gamma_half.numpy() + # constraints.append(Lambda @ theta_n >= l.reshape(-1, 1)) + # constraints.append(Lambda @ theta_n <= u.reshape(-1, 1)) + # + # prob = cp.Problem(objective, constraints) + + # def prox(x): + # return Gamma_half @ torch.from_numpy(scipy.optimize.nnls(invGamma.numpy(), (invGamma_half@x).numpy().reshape(-1), maxiter = 1000)[0]).view(-1,1) + + if self.data is not None: + if self.feedback == "count-record" and self.dual == False: + if self.observations is not None: + nabla = ( + lambda y: -torch.einsum( + "i,ij->j", + 1.0 / (self.observations @ y).view(-1), + self.observations, + ).view(-1, 1) + + torch.sum(self.phis, dim=0).view(-1, 1) + + self.s * y.view(-1, 1) + ) + else: + nabla = lambda theta: torch.sum(self.phis, dim=0).view( + -1, 1 + ) + self.s * theta.view(-1, 1) + + elif self.feedback == "count-record" and self.dual == True: + mask = self.bucketized_counts > 0 + phis = self.varphis[mask, :] + tau = self.total_bucketized_time[mask] + + if self.observations is not None: + obs = self.anchor_points_emb + weights = self.anchor_weights + mask = weights > 0.0 + nabla = ( + lambda y: -torch.einsum( + "i,ij->j", + weights[mask] / ((obs[mask, :] @ y).view(-1)), + obs[mask], + ).view(-1, 1) + + torch.einsum("i,ij->j", tau, phis).view(-1, 1) + + self.s * y.view(-1, 1) + ) + else: + nabla = lambda y: torch.einsum("i,ij->j", tau, phis).view( + -1, 1 + ) + self.s * y.view(-1, 1) + + elif self.feedback == "histogram": + nabla = ( + lambda theta: -torch.sum( + torch.diag((1.0 / (self.phis @ theta).view(-1)) * self.counts) + @ self.phis, + dim=0, + ).view(-1, 1) + + torch.sum(self.phis, dim=0).view(-1, 1) + + self.s * theta.view(-1, 1) + ) + else: + nabla = lambda theta: self.s * theta.view(-1, 1) + + if self.rate is not None: + theta = self.rate.view(-1, 1) + else: + theta = ( + self.b + + 0.05 + * torch.rand( + size=(self.get_m(), 1), dtype=torch.float64, requires_grad=False + ).view(-1, 1) + ** 2 + ) + + for k in range(steps): + w = torch.randn(size=(self.get_m(), 1)).double() + + # calculate proper step-size + W = self.construct_covariance_matrix_laplace(theta=theta) + L = float( + scipy.sparse.linalg.eigsh( + W.numpy(), k=1, which="LM", return_eigenvectors=False, tol=1e-3 + ) + ) + if stepsize is not None: + eta = 0.5 * stepsize / L + else: + eta = 0.5 / L + + # prox calculate + # x.value = theta.numpy() + # prob.solve(solver=cp.OSQP, warm_start=True, verbose=False, eps_abs=1e-3, eps_rel=1e-3) + # proximal_theta = torch.from_numpy(theta_n.value) + + # update step + # theta = 0.5 * theta - eta * nabla(theta) + 0.5 * proximal_theta + np.sqrt(2 * eta) * w + + # update step + theta = ( + 0.5 * theta + - eta * nabla(theta) + + 0.5 * prox(theta) + + np.sqrt(2 * eta) * w + ) + if verbose == True: + print("Iter:", k, theta.T) + + self.sampled_theta = prox(theta) + + def sample_proximal_langevin_simple_prox(self, steps=300, verbose=False): + + Gamma_half, invGamma_half = self.packing.cov(inverse=True) + l, Lambda, u = self.get_constraints() + prox_simple = lambda x: torch.minimum( + torch.maximum(x.view(-1), torch.from_numpy(l).view(-1)), + torch.from_numpy(u).view(-1), + ).view(-1, 1) + + def prox(x): + return invGamma_half @ prox_simple(Gamma_half @ x) + + phis = self.phis + if self.feedback == "count-record" and self.dual == False: + if self.observations is not None: + obs = self.observations + + func = ( + lambda y: -torch.sum(torch.log(obs @ y)) + + torch.sum((phis @ y)) + + self.s * y.T @ y + ) + + nabla = ( + lambda y: -torch.einsum( + "i,ij->j", 1.0 / (obs @ y).view(-1), obs + ).view(-1, 1) + + torch.sum(phis, dim=0).view(-1, 1) + + self.s * y.view(-1, 1) + ) + else: + func = lambda y: torch.sum(phis @ y).view(-1, 1) + self.s * y.T @ y + + nabla = lambda y: torch.sum(phis, dim=0).view(-1, 1) + self.s * y.view( + -1, 1 + ) + + elif self.feedback == "count-record" and self.dual == True: + mask = self.bucketized_counts > 0 + phis = self.varphis[mask, :] + tau = self.total_bucketized_time[mask] + + if self.observations is not None: + obs = self.anchor_points_emb + weights = self.anchor_weights + mask = weights > 0.0 + func = ( + lambda y: -torch.sum( + weights[mask].view(-1, 1) * torch.log(obs[mask, :] @ y) + ) + + torch.sum(tau.view(-1, 1) * (phis @ y)) + + self.s * y.T @ y + ) + + nabla = ( + lambda y: -torch.einsum( + "i,ij->j", + weights[mask] / ((obs[mask, :] @ y).view(-1)), + obs[mask], + ).view(-1, 1) + + torch.einsum("i,ij->j", tau, phis).view(-1, 1) + + self.s * y.view(-1, 1) + ) + else: + func = ( + lambda y: torch.sum(tau.view(-1, 1) * (phis @ y)) + self.s * y.T @ y + ) + + nabla = lambda y: torch.einsum("i,ij->j", tau, phis).view( + -1, 1 + ) + self.s * y.view(-1, 1) + + elif self.feedback == "histogram": + func = ( + lambda y: -torch.sum( + self.counts.view(-1) * torch.log(phis @ y).view(-1) + ) + + torch.sum(phis @ y) + + self.s * y.T @ y + ) + nabla = ( + lambda y: -torch.einsum( + "i,ij->j", self.counts.view(-1) / (phis @ y).view(-1), phis + ).view(-1, 1) + + torch.sum(phis, dim=0).view(-1, 1) + + self.s * y + ) + + # hessian = lambda y: self.construct_covariance_matrix_laplace() + + y = prox( + torch.randn(size=(self.get_m(), 1), dtype=torch.float64, requires_grad=True) + ) + y.data = self.rate.view(-1, 1) + + W = self.construct_covariance_matrix_laplace() + L = float( + scipy.sparse.linalg.eigsh( + W.numpy(), k=1, which="LM", return_eigenvectors=False, tol=1e-5 + ) + ) + + eta = 0.5 / (L + 1) + + for k in range(steps): + W = torch.randn(size=(self.get_m(), 1)).double() + nabla_y = nabla(y.data) + y.data = ( + (1 - eta) * y.data + - eta * nabla_y + + eta * prox(y.data) + + np.sqrt(2 * eta) * W + ) + if verbose == True: + print("Iter:", k, y.T) + print("grad:", y.grad.T) + + self.sampled_theta = prox(y.detach()) + + def sample_hessian_positive_langevin(self, steps=500, verbose=False, stepsize=None): + + if self.data is not None: + if self.feedback == "count-record" and self.dual == False: + if self.observations is not None: + nabla = ( + lambda y: -torch.einsum( + "i,ij->j", + 1.0 / (self.observations @ y).view(-1), + self.observations, + ).view(-1, 1) + + torch.sum(self.phis, dim=0).view(-1, 1) + + self.s * y.view(-1, 1) + ) + else: + nabla = lambda theta: torch.sum(self.phis, dim=0).view( + -1, 1 + ) + self.s * theta.view(-1, 1) + + elif self.feedback == "count-record" and self.dual == True: + + mask = self.bucketized_counts > 0 + phis = self.varphis[mask, :] + tau = self.total_bucketized_time[mask] + + if self.observations is not None: + obs = self.anchor_points_emb + weights = self.anchor_weights + mask = weights > 0.0 + nabla = ( + lambda y: -torch.einsum( + "i,ij->j", + weights[mask] / ((obs[mask, :] @ y).view(-1)), + obs[mask], + ).view(-1, 1) + + torch.einsum("i,ij->j", tau, phis).view(-1, 1) + + self.s * y.view(-1, 1) + ) + else: + nabla = lambda y: torch.einsum("i,ij->j", tau, phis).view( + -1, 1 + ) + self.s * y.view(-1, 1) + + elif self.feedback == "histogram": + nabla = ( + lambda theta: -torch.sum( + torch.diag((1.0 / (self.phis @ theta).view(-1)) * self.counts) + @ self.phis, + dim=0, + ).view(-1, 1) + + torch.sum(self.phis, dim=0).view(-1, 1) + + self.s * theta.view(-1, 1) + ) + else: + nabla = lambda theta: self.s * theta.view(-1, 1) + + Gamma_half = self.packing.cov() + lz, Lambda, u = self.get_constraints() + + Lambda = torch.from_numpy(Lambda) @ Gamma_half + y = ( + self.b + + 0.05 + * torch.rand( + size=(self.get_m(), 1), dtype=torch.float64, requires_grad=True + ).view(-1) + ** 2 + ) + + if self.rate is not None: + y.data = self.rate.data + Gamma_half @ y.data + else: + y.data = Gamma_half @ y.data + + if verbose == True: + print("initial point") + print(y.data) + + W = self.construct_covariance_matrix_laplace() + L = float( + scipy.sparse.linalg.eigsh( + W.numpy(), k=1, which="LM", return_eigenvectors=False, tol=1e-5 + ) + ) + + if stepsize is None: + eta = 1.0 / (L + 1) + else: + eta = stepsize / (L + 1) + + D = lambda x: torch.diag(1.0 / torch.abs(Lambda @ x).view(-1)) + sqrt_hessian = lambda x: Lambda @ D(x) + + phi = lambda x: -torch.sum(torch.log(Lambda @ x)) + nabla_phi = lambda x: -torch.einsum( + "i,ij->j", 1.0 / (Lambda @ x).view(-1), Lambda + ) + hessian_phi = ( + lambda x: Lambda.T @ torch.diag(1.0 / (Lambda @ x).view(-1) ** 2) @ Lambda + ) + + for k in range(steps): + w = torch.randn(size=(self.get_m(), 1)).double() + nabla_val = nabla(y) + H = sqrt_hessian(y.data) + z = ( + nabla_phi(y.data).view(-1, 1) + - eta * nabla_val + + np.sqrt(2 * eta) * H @ w + ) + + # y.data = newton_solve(lambda s: nabla_phi(s).reshape(-1)-z.data.reshape(-1),y.reshape(-1), + # verbose = verbose, grad = hessian_phi).view(-1,1) + + # # minimization appraoch + def objective(s): + return torch.sum((nabla_phi(s).reshape(-1) - z.reshape(-1)) ** 2) + + # # + + # x0 = y.reshape(-1).clone().detach().numpy() + # res = minimize(objective, x0, backend='torch', method='Newton-CG', precision='float64', tol=1e-5, hvp_type='vhp') + # y.data = torch.from_numpy(res.x) + + x0 = y.reshape(-1).clone() + res = minimize_torch(objective, x0, method="newton-cg", tol=1e-5) + y.data = res.x + + if verbose: + print("Iter:", k) + print(y.T) + + self.sampled_theta = y.data + + def sample_mla_prime(self, steps=100, verbose=False, stepsize=None): + Gamma_half, invGamma_half = self.packing.cov(inverse=True) + invGamma = invGamma_half.T @ invGamma_half + l, Lambda, u = self.get_constraints() + Lambda = torch.from_numpy(Lambda) @ Gamma_half + + if self.data is not None: + if self.feedback == "count-record" and self.dual == False: + if self.observations is not None: + observations = self.observations @ invGamma_half + phis = self.phis @ invGamma_half + nabla = ( + lambda y: -torch.einsum( + "i,ij->j", 1.0 / (observations @ y).view(-1), observations + ).view(-1, 1) + + torch.sum(phis, dim=0).view(-1, 1) + + self.s * invGamma @ y.view(-1, 1) + ) + else: + nabla = lambda theta: torch.sum(phis, dim=0).view( + -1, 1 + ) + self.s * invGamma @ theta.view(-1, 1) + + else: + nabla = lambda theta: self.s * invGamma @ theta.view(-1, 1) + + y = ( + self.b + + 0.05 + * torch.rand( + size=(self.get_m(), 1), dtype=torch.float64, requires_grad=True + ).reshape(-1, 1) + ** 2 + ) + # if self.rate is not None: + # y.data = Gamma_half @ self.rate.data.view(-1,1) + y.data + # else: + y.data = y.data + + if verbose == True: + print("initial point") + print(y.data) + + W = invGamma_half.T @ self.construct_covariance_matrix_laplace() @ invGamma_half + L = float( + scipy.sparse.linalg.eigsh( + W.numpy(), k=1, which="LM", return_eigenvectors=False, tol=1e-5 + ) + ) + + if stepsize is None: + eta = 1.0 / (L + 1) + else: + eta = stepsize / (L + 1) + + from stpy.approx_inference.sampling_helper import get_increment + + for k in range(steps): + + nabla_val = nabla(y) + + # cvxpy minimization + # x = cp.Variable((self.get_m(), 1)) + # objective = cp.Minimize( eta * nabla_val.detach().numpy().T @ x - cp.sum(cp.log(x)) -(-1./y.data).T@x) + # constraints = [x >= 0.] + # + # prob = cp.Problem(objective, constraints) + # prob.solve(solver = cp.MOSEK) + + w0 = eta * nabla_val.data + 1.0 / y.data + # initial point for the solve + # w0 = -1./( torch.from_numpy(x.value)) + + # simulate + f = lambda w, n: n / torch.abs(w) + w = get_increment(eta, 1000, f, w0, path=False) + + # back mirror map + y.data = -1.0 / w + + if verbose: + print("Iter:", k) + print(y.T) + + self.sampled_theta = invGamma_half @ y.data + + def sample_hessian_positive_langevin_2( + self, steps=500, verbose=False, stepsize=None, preconditioner=True + ): + + Gamma_half, invGamma_half = self.packing.cov(inverse=True) + invGamma = invGamma_half @ invGamma_half + if self.data is not None: + + if self.feedback == "count-record" and self.dual == False: + + observations = self.observations @ invGamma_half + phis = self.phis @ invGamma_half + + if self.observations is not None: + nabla = ( + lambda y: -torch.einsum( + "i,ij->j", 1.0 / (observations @ y).view(-1), observations + ).view(-1, 1) + + torch.sum(phis, dim=0).view(-1, 1) + + self.s * invGamma @ y.view(-1, 1) + ) + else: + nabla = lambda theta: torch.sum(phis, dim=0).view( + -1, 1 + ) + self.s * invGamma @ theta.view(-1, 1) + + else: + nabla = lambda theta: self.s * invGamma @ theta.view(-1, 1) + + y = ( + torch.rand( + size=(self.get_m(), 1), dtype=torch.float64, requires_grad=True + ).view(-1) + ** 2 + ) + # if self.rate is not None: + # y.data = Gamma_half @ self.rate.data + y.data + + if verbose == True: + print("initial point") + print(y.data) + + W = self.construct_covariance_matrix_laplace(minimal=True) + L = float( + scipy.sparse.linalg.eigsh( + W.numpy(), k=1, which="LM", return_eigenvectors=False, tol=1e-5 + ) + ) + + if stepsize is None: + eta = 1.0 / (L + 1) + else: + eta = stepsize / (L + 1) + + for k in range(steps): + w = torch.randn(size=(self.get_m(), 1)).double() / torch.abs(y.data).view( + -1, 1 + ) + nabla_val = nabla(y) + z = ( + -1.0 / y.data.view(-1, 1) + + self.b + - eta * Gamma_half @ nabla_val + + np.sqrt(2 * eta) * Gamma_half @ w + ) + y.data = -1.0 / z + self.b + + if verbose: + print("Iter:", k) + print(y.T) + + self.sampled_theta = invGamma_half @ y.data + + def sample_newton_langevin(self, steps=1000, stepsize=None, verbose=False): + Gamma_half, invGamma_half = self.packing.cov(inverse=True) + invGamma = invGamma_half @ invGamma_half + if self.data is not None: + + if self.feedback == "count-record" and self.dual == False: + + observations = self.observations @ invGamma_half + phis = self.phis @ invGamma_half + + if self.observations is not None: + nabla = ( + lambda y, bar: -torch.einsum( + "i,ij->j", 1.0 / (observations @ y).view(-1), observations + ).view(-1, 1) + + torch.sum(phis, dim=0).view(-1, 1) + + self.s * invGamma @ y.view(-1, 1) + - bar * 1.0 / y + ) + else: + nabla = ( + lambda theta, bar: torch.sum(phis, dim=0).view(-1, 1) + + self.s * invGamma @ theta.view(-1, 1) + - bar * 1.0 / theta + ) + + else: + nabla = ( + lambda theta, bar: self.s * invGamma @ theta.view(-1, 1) + - bar * 1.0 / theta + ) + + y = ( + 0.05 + * torch.rand( + size=(self.get_m(), 1), dtype=torch.float64, requires_grad=True + ).view(-1, 1) + ** 2 + ) + + barrier = 10.0 + # hessian = lambda theta,bar: torch.einsum('ik,k,kj->ij',observations.T,(observations@theta).view(-1),observations) + invGamma + bar/theta**2 + hessian = ( + lambda theta, bar: observations.T + @ torch.diag(1 / (observations @ theta).view(-1) ** 2) + @ observations + + invGamma + + torch.diag(bar / theta.view(-1) ** 2) + ) + hessian_sqrt = lambda theta, bar: torch.cholesky(hessian(theta, bar)) + eta = 1.0 + + for k in range(steps): + w = torch.randn(size=(self.get_m(), 1)).double() + nabla_val = nabla(y, barrier) + y.data = ( + y.data + - torch.linalg.solve(hessian(y.data, barrier), nabla_val) + + np.sqrt(2 * eta) + * torch.linalg.solve(hessian_sqrt(y.data, barrier), w) + ) + + if verbose: + print("Iter:", k) + print(y.T) + + self.sampled_theta = invGamma_half @ y.data + + # self.sampled_theta = y.data + + def sample_hmc(self, steps=1000, stepsize=None, verbose=False): + import hamiltorch + + phis = self.phis + if self.feedback == "count-record" and self.dual == False: + if self.observations is not None: + obs = self.observations + func = ( + lambda y: torch.sum(torch.log(obs @ y)) + - torch.sum((phis @ y)) + - self.s * y.T @ y + ) + else: + func = lambda y: -torch.sum(phis @ y).view(-1, 1) - self.s * y.T @ y + + num_samples = 1 + num_steps_per_sample = steps + if stepsize is None: + step_size = 1e-8 + else: + step_size = stepsize + + params_init = self.rate + self.sample_theta = hamiltorch.sample( + log_prob_func=func, + params_init=params_init, + num_samples=num_samples, + step_size=step_size, + num_steps_per_sample=num_steps_per_sample, + ) + print(self.sampled_theta) + + def sample_variational(self, xtest, accuracy=1e-4, verbose=False, samples=1): + from stpy.approx_inference.variational_mf import VMF_SGCP + + cov_params = [self.kernel_object.kappa, self.kernel_object.gamma] + S_borders = np.array([[-1.0, 1.0]]) + num_inducing_points = self.m + num_integration_points = 256 + X = self.x + + var_mf_sgcp = VMF_SGCP( + S_borders, + X, + cov_params, + num_inducing_points, + num_integration_points=num_integration_points, + update_hyperparams=False, + output=0, + conv_crit=accuracy, + ) + var_mf_sgcp.run() + sample_paths = var_mf_sgcp.sample_posterior(xtest, num_samples=1.0) + return sample_paths + + def sample(self, verbose=False, steps=1000, domain=None): + """ + :return: + """ + if self.steps is not None: + steps = self.steps + + if self.stepsize is not None: + stepsize = self.stepsize + else: + stepsize = None + + l, Lambda, u = self.get_constraints() + print("Sampling started.") + if self.rate is None: + self.fit_gp() + + if self.sampling == "mirror": + self.sample_mirror_langevin(steps=steps, verbose=verbose) + elif self.sampling == "proximal+prox": + self.sample_proximal_langevin_prox(steps=steps, verbose=verbose) + elif self.sampling == "proximal+simple_prox": + self.sample_proximal_langevin_simple_prox(steps=steps, verbose=verbose) + elif self.sampling == "hessian": + self.sample_hessian_positive_langevin( + steps=steps, verbose=verbose, stepsize=stepsize + ) + elif self.sampling == "hessian2": + self.sample_hessian_positive_langevin_2( + steps=steps, verbose=verbose, stepsize=stepsize + ) + elif self.sampling == "mla_prime": + self.sample_mla_prime(steps=steps, verbose=verbose, stepsize=stepsize) + elif self.sampling == "hmc": + self.sample_hmc(steps=steps, verbose=verbose, stepsize=stepsize) + elif self.sampling == "polyia_variational": + self.sample_variational(accuracy=1.0 / steps, verbose=verbose) + else: + raise NotImplementedError("Sampling of such is not supported.") + + print("Sampling finished.") + + def sampled_lcb_ucb(self, xtest, samples=100, delta=0.1): + paths = [] + for i in range(samples): + self.sample() + path = self.sample_path_points(xtest).view(1, -1) + paths.append(path) + + paths = torch.cat(paths, dim=0) + lcb = torch.quantile(paths, delta, dim=0) + ucb = torch.quantile(paths, 1 - delta, dim=0) + return lcb, ucb + + def penalized_likelihood_fast(self, threads=4): + l, Lambda, u = self.get_constraints() + Gamma_half, invGamma_half = self.cov(inverse=True) + + if self.dual == False: + # using all points without anchor points + if self.observations is not None: + + def objective(theta): + return ( + -torch.sum(torch.log(self.observations @ invGamma_half @ theta)) + + torch.sum(self.phis @ invGamma_half @ theta) + + self.s * 0.5 * torch.sum((invGamma_half @ theta) ** 2) + ) + + else: + + def objective(theta): + return torch.sum( + self.phis @ invGamma_half @ theta + ) + self.s * 0.5 * torch.sum((invGamma_half @ theta) ** 2) + + else: + # using anchor points + mask = self.bucketized_counts > 0 + phis = self.varphis[mask, :] + tau = self.total_bucketized_time[mask] + + if self.observations is not None: + observations = self.anchor_points_emb + weights = self.anchor_weights + mask = weights > 0.0 + + def objective(theta): + return ( + -torch.einsum( + "i,i", + weights[mask], + torch.log(observations[mask, :] @ invGamma_half @ theta), + ) + + torch.einsum("i,i", tau, phis @ invGamma_half @ theta) + + self.s * 0.5 * torch.sum((invGamma_half @ theta) ** 2) + ) + + else: + + def objective(theta): + return torch.einsum( + "i,i", tau, phis @ invGamma_half @ theta + ) + self.s * 0.5 * torch.sum((invGamma_half @ theta) ** 2) + + if self.rate is not None: + theta0 = torch.zeros(size=(self.get_m(), 1)).view(-1).double() + theta0.data = self.rate.data + else: + theta0 = torch.zeros(size=(self.get_m(), 1)).view(-1).double() + + eps = 1e-4 + res = minimize( + objective, + theta0.numpy(), + backend="torch", + method="L-BFGS-B", + bounds=(l[0] + eps, u[0]), + precision="float64", + tol=1e-8, + options={ + "ftol": 1e-08, + "gtol": 1e-08, + "eps": 1e-08, + "maxfun": 15000, + "maxiter": 15000, + "maxls": 20, + }, + ) + + self.rate = invGamma_half @ torch.from_numpy(res.x) + print(res.message) + return self.rate + + def penalized_likelihood(self, threads=4): + + theta = cp.Variable(self.get_m()) + l, Lambda, u = self.get_constraints() + + Gamma_half = self.cov(inverse=False) + + if self.dual == False: + + # using all points without anchor points + phis = self.phis.numpy() + if self.observations is not None: + observations = self.observations.numpy() + objective = cp.Minimize( + -cp.sum(cp.log(observations @ theta)) + + cp.sum(phis @ theta) + + self.s * 0.5 * cp.sum_squares(theta) + ) + else: + objective = cp.Minimize( + cp.sum(phis @ theta) + self.s * 0.5 * cp.sum_squares(theta) + ) + + else: + + # using anchor points + mask = self.bucketized_counts.clone().numpy() > 0 + phis = self.varphis[mask, :].clone().numpy() + tau = self.total_bucketized_time[mask].clone().numpy() + + if self.observations is not None: + observations = self.anchor_points_emb.numpy() + weights = self.anchor_weights.numpy() + mask = weights > 0.0 + objective = cp.Minimize( + -cp.sum( + cp.multiply( + weights[mask], cp.log(observations[mask, :] @ theta) + ) + ) + + cp.sum(cp.multiply(tau, phis @ theta)) + + self.s * 0.5 * cp.sum_squares(theta) + ) + else: + objective = cp.Minimize( + cp.sum(cp.multiply(tau, phis @ theta)) + + self.s * 0.5 * cp.sum_squares(theta) + ) + + constraints = [] + + Lambda = Lambda @ Gamma_half.numpy() + + constraints.append(Lambda @ theta >= l) + constraints.append(Lambda @ theta <= u) + + prob = cp.Problem(objective, constraints) + + if self.rate is not None: + theta.value = self.rate.numpy() + + try: + prob.solve(solver=cp.CLARABEL, warm_start=False, verbose=True) + + self.rate = torch.from_numpy(theta.value) + return self.rate + except: + print("Optimization failed. Using the old value.") + print(prob.status) + return self.rate + + def penalized_likelihood_integral(self, threads=4): + + phis = self.phis.numpy() + counts = self.counts.numpy() + + theta = cp.Variable(self.get_m()) + l, Lambda, u = self.get_constraints() + Gamma_half = self.cov().numpy() + objective = cp.Minimize( + -cp.sum(counts @ cp.log(phis @ theta)) + + cp.sum(phis @ theta) + + self.s * 0.5 * cp.sum_squares(theta) + ) + + constraints = [] + Lambda = Lambda @ Gamma_half + constraints.append(Lambda @ theta >= l) + constraints.append(Lambda @ theta <= u) + + # if self.rate is not None: + # theta.value = self.rate.numpy() + try: + prob = cp.Problem(objective, constraints) + prob.solve(solver=cp.CLARABEL, warm_start=False, verbose=True) + self.rate = torch.from_numpy(theta.value) + except: + print("Optimization failed. Using the old value.") + print(prob.status) + + return self.rate + + def bucketization(self): + + phis = [] + observations = [] + + # project sets to smallest forms, and then sum on those only + basic_sets = self.basic_sets + + data_basic = [[] for _ in range(len(basic_sets))] + sensing_times = [[] for _ in range(len(basic_sets))] + counts = torch.zeros(len(basic_sets)).int() + total_data = 0.0 + self.total_bucketized_obs = ( + torch.zeros(size=(len(basic_sets), 1)).double().view(-1) + ) + self.total_bucketized_time = ( + torch.zeros(size=(len(basic_sets), 1)).double().view(-1) + ) + + for sample in self.data: + S, obs, dt = sample + if obs is not None: + total_data = total_data + obs.size()[0] # total counts + for index, elementary in enumerate( + basic_sets + ): # iterate over basic sets + mask = elementary.is_inside( + obs + ) # mask which belong to the elementary + if S.inside(elementary) == True: + data_basic[index].append(obs[mask]) + counts[index] += 1 + sensing_times[index].append(dt) + else: + for index, elementary in enumerate(basic_sets): + if S.inside(elementary) == True: + data_basic[index].append(torch.Tensor([])) + counts[index] += 1 + sensing_times[index].append(dt) + + for index, elementary in enumerate(basic_sets): + arr = np.array( + [int(elem.size()[0]) for elem in data_basic[index]] + ) # counts over sensing rounds + phi = self.packing.integral(elementary) # * counts[index] + + self.total_bucketized_obs[index] = float(np.sum(arr)) + self.total_bucketized_time[index] = float(np.sum(sensing_times[index])) + + observations.append(arr) + phis.append(phi.view(1, -1)) # construct varphi_B + + self.bucketized_obs = ( + observations.copy() + ) # these are number of counts associated with sensings + self.bucketized_time = ( + sensing_times.copy() + ) # these are times each basic set has been sensed + self.bucketized_counts = ( + counts # these are count each basic set has been sensed + ) + + def variance_correction(self, variance): + + if self.var_cor_on == 1: + + g = ( + lambda B, k, mu: -0.5 * (B**2) / ((mu**2) * k) + - B / (mu * k) + + (np.exp(B / (k * mu)) - 1) + ) + gn = lambda k: g(self.U, k, variance) + + from scipy import optimize + + k = optimize.bisect(gn, 1, 10000000) + + return k + else: + return 1.0 + + def least_squares_weighted(self, threads=4): + + # if self.approx_fit == False: + # self.bucketization() + + theta = cp.Variable(self.get_m()) + l, Lambda, u = self.get_constraints() + Gamma_half = self.cov().numpy() + + mask = self.bucketized_counts.clone().numpy() > 0 + observations = self.total_bucketized_obs[mask].clone().numpy() + phis = self.varphis[mask, :].clone().numpy() + tau = self.total_bucketized_time.clone().numpy() + + variances = self.variances.view(-1).clone().numpy() + + for i in range(variances.shape[0]): + if mask[i] > 0: + variances[i] = ( + variances[i] + * tau[i] + * self.variance_correction(variances[i] * tau[i]) + ) + + selected_variances = variances[mask] + objective = cp.Minimize( + cp.sum_squares( + (cp.multiply((phis @ theta), tau[mask]) - observations) + / (np.sqrt(selected_variances)) + ) + + 0.5 * self.s * cp.norm2(theta) ** 2 + ) + + constraints = [] + Lambda = Lambda @ Gamma_half + # constraints.append(Lambda @ theta >= l) + constraints.append(Lambda @ theta <= u) + + prob = cp.Problem(objective, constraints) + + prob.solve( + solver=cp.MOSEK, + warm_start=False, + verbose=False, + mosek_params={ + mosek.iparam.num_threads: threads, + mosek.iparam.intpnt_solve_form: mosek.solveform.primal, + mosek.dparam.intpnt_co_tol_pfeas: 1e-4, + mosek.dparam.intpnt_co_tol_dfeas: 1e-4, + mosek.dparam.intpnt_co_tol_rel_gap: 1e-4, + }, + ) + print(prob.status) + self.rate = torch.from_numpy(theta.value) + return self.rate + + def least_sqaures_weighted_fast(self, threads=4): + + l, Lambda, u = self.get_constraints() + Gamma_half, invGamma_half = self.cov(inverse=True) + + mask = self.bucketized_counts > 0 + observations = self.total_bucketized_obs[mask] + phis = self.varphis[mask, :] + tau = self.total_bucketized_time + + variances = self.variances.view(-1) + for i in range(variances.size()[0]): + if mask[i] > 0: + variances[i] = ( + variances[i] + * tau[i] + * self.variance_correction(variances[i] * tau[i]) + ) + selected_variances = variances[mask] + + def objective(theta): + return torch.sum( + ( + (tau[mask] * (phis @ invGamma_half @ theta) - observations) + / (np.sqrt(selected_variances)) + ) + ** 2 + ) + self.s * 0.5 * torch.sum((invGamma_half @ theta) ** 2) + + if self.rate is not None: + theta0 = torch.zeros(size=(self.get_m(), 1)).view(-1).double() + theta0.data = Gamma_half @ self.rate.data + else: + theta0 = torch.zeros(size=(self.get_m(), 1)).view(-1).double() + + eps = 1e-4 + res = minimize( + objective, + theta0.numpy(), + backend="torch", + method="L-BFGS-B", + bounds=(l[0] + eps, u[0]), + precision="float64", + tol=1e-8, + options={ + "ftol": 1e-06, + "gtol": 1e-06, + "eps": 1e-08, + "maxfun": 15000, + "maxiter": 15000, + "maxls": 20, + }, + ) + self.rate = invGamma_half @ torch.from_numpy(res.x) + + return self.rate + + def least_squares_weighted_integral(self, threads=4): + + # if self.approx_fit == False: + # self.bucketization() + + theta = cp.Variable(self.get_m()) + l, Lambda, u = self.get_constraints() + Gamma_half = self.cov().numpy() + + phis = self.phis.clone().numpy() # integrated actions + if self.rate is None: + rate = torch.pinverse(torch.from_numpy(Gamma_half)) @ torch.from_numpy(u) + else: + rate = self.rate.clone() + + if len(self.variances_histogram) > 0: + variances = self.variances_histogram.numpy() + + for i in range(variances.shape[0]): + variances[i] = variances[i] * self.variance_correction(variances[i]) + else: + variances = np.zeros(len(self.data)) + i = 0 + for S, obs, dt in self.data: + variances[i] = S.volume() * self.B + variances[i] = variances[i] * self.variance_correction(variances[i]) + i = i + 1 + + observations = self.counts.clone().numpy() + + objective = cp.Minimize( + cp.sum_squares((phis @ theta - observations) / np.sqrt(variances)) + + self.s * cp.sum_squares(theta) + ) + constraints = [] + Lambda = Lambda @ Gamma_half + constraints.append(Lambda @ theta >= l) + constraints.append(Lambda @ theta <= u) + prob = cp.Problem(objective, constraints) + + prob.solve( + solver=cp.MOSEK, + warm_start=False, + verbose=False, + mosek_params={ + mosek.iparam.num_threads: threads, + mosek.iparam.intpnt_solve_form: mosek.solveform.dual, + mosek.dparam.intpnt_co_tol_pfeas: 1e-6, + mosek.dparam.intpnt_co_tol_dfeas: 1e-6, + mosek.dparam.intpnt_co_tol_rel_gap: 1e-6, + }, + ) + + self.rate = torch.from_numpy(theta.value) + + return self.rate + + def penalized_likelihood_bins(self, threads=4): + theta = cp.Variable(self.get_m()) + l, Lambda, u = self.get_constraints() + Gamma_half = self.cov().numpy() + + mask = self.bucketized_counts.clone().numpy() > 0 + observations = self.total_bucketized_obs[mask].clone().numpy() + phis = self.varphis[mask, :].clone().numpy() + tau = self.total_bucketized_time[mask].clone().numpy() + + constraints = [] + Lambda = Lambda @ Gamma_half + constraints.append(Lambda @ theta >= l) + constraints.append(Lambda @ theta <= u) + + objective = cp.Minimize( + -cp.sum(observations @ cp.log(cp.multiply(tau, phis @ theta))) + + cp.sum(cp.multiply(phis @ theta, tau)) + + self.s * 0.5 * cp.sum_squares(theta) + ) + prob = cp.Problem(objective, constraints) + try: + prob.solve(solver=cp.CLARABEL, warm_start=False, verbose=True) + + self.rate = torch.from_numpy(theta.value) + except: + print("optimization failed.") + return self.rate + + def penalized_likelihood_integral_bins(self, threads=4): + phis = self.phis.numpy() + counts = self.counts.numpy() + + theta = cp.Variable(self.get_m()) + l, Lambda, u = self.get_constraints() + Gamma_half = self.cov().numpy() + objective = cp.Minimize( + -cp.sum(counts @ cp.log(phis @ theta)) + + cp.sum(phis @ theta) + + self.s * 0.5 * cp.sum_squares(theta) + ) + + constraints = [] + Lambda = Lambda @ Gamma_half + constraints.append(Lambda @ theta >= l) + constraints.append(Lambda @ theta <= u) + + try: + if constraints: + prob = cp.Problem(objective, constraints) + else: + prob = cp.Problem(objective) + prob.solve(solver=cp.CLARABEL, warm_start=False, verbose=True) + self.rate = torch.from_numpy(theta.value) + except: + print("Optimization failed. Using the old value.") + + return self.rate + + def update_variances(self, value=False, force=False): + self.approx_fit = True + if ( + self.feedback == "count-record" and self.estimator == "least-sq" + ) or force == True: + print("updating variance") + for index, set in enumerate(self.basic_sets): + if value == False: + ucb = self.ucb(set) + self.variances[index] = np.minimum(ucb, self.variances[index]) + else: + self.variances[index] = self.mean_set(set) + else: + if self.data is not None: + if self.peeking == True: + new_var = [] + for S, _, dt in self.data: + new_var.append(float(self.ucb(S)) * dt) + self.variances_histogram = torch.Tensor(new_var.copy()).double() + else: + last = self.data[-1] + new_var = torch.Tensor([self.ucb(last[0]) * last[2]]).double() + if len(self.variances_histogram) > 0: + self.variances_histogram = torch.cat( + (self.variances_histogram, new_var) + ) + else: + self.variances_histogram = new_var + self.approx_fit = False + + def ucb(self, S, dt=1.0, delta=0.5): + + if self.data is None or self.rate is None: + return self.B * S.volume() * dt + + if self.approx == None: + + if self.uncertainty == "laplace": + return self.mean_var_laplace_set(S, dt=dt, beta=self.beta(0))[1] + + elif self.uncertainty == "least-sq": + return self.mean_var_reg_set(S, dt=dt, beta=self.beta(0))[1] + + elif self.uncertainty == "bins": + return self.mean_var_bins_set(S, dt=dt, beta=self.beta(0))[1] + + elif self.uncertainty == "likelihood-ratio": + return self.mean_var_ratio_set(S, dt=dt, beta=self.beta(0))[1] + + elif self.uncertainty == "conformal": + return self.mean_var_conformal_set(S, dt=dt, delta=delta)[2] + + else: + raise AssertionError("Not Implemented.") + + elif self.approx == "ellipsoid": + + if self.approx_fit == False: + self.fit_ellipsoid_approx() + self.beta(0) + print("Fitting Approximation.") + self.approx_fit = True + return self.map_lcb_ucb_approx_action(S, dt=dt, beta=self.beta(0))[2] + else: + raise AssertionError("Not implemented.") + + def mean_std_per_action(self, S, W, dt, beta): + + phi = self.packing.integral(S) * dt + Gamma_half = self.cov().numpy() + + l, Lambda, u = self.get_constraints() + + Lambda = Lambda @ Gamma_half + ucb, _ = maximize_on_elliptical_slice( + phi.numpy(), (W).numpy(), self.rate.view(-1).numpy(), beta, l, Lambda, u + ) + lcb, _ = maximize_on_elliptical_slice( + -phi.numpy(), (W).numpy(), self.rate.view(-1).numpy(), beta, l, Lambda, u + ) + map = phi @ self.rate + + return map, float(ucb), -float(lcb) + + def mean_var_laplace_set(self, S, dt, beta=2.0): + if self.approx_fit == False: + self.W = self.construct_covariance_matrix_laplace() + self.approx_fit = True + return self.mean_std_per_action(S, self.W, dt, beta) + + def mean_var_reg_set(self, S, dt, beta=2.0): + if self.approx_fit == False: + self.W = self.construct_covariance_matrix_regression() + self.approx_fit = True + return self.mean_std_per_action(S, self.W, dt, beta) + + def mean_var_bins_set(self, S, dt, beta=2.0): + if self.approx_fit == False: + self.W = self.construct_covariance_matrix_bins() + self.approx_fit = True + return self.mean_std_per_action(S, self.W, dt, beta) + + def mean_var_ratio_set(self, S, dt, beta=2.0): + x = self.packing.integral(S) * dt + map = x @ self.rate + # v = np.log(1. / 0.1) - torch.sum(self.counts.double() @ torch.log(self.phis.double() @ self.rate)) \ + # + torch.sum(self.phis.double() @ self.rate) + 0.5 * self.s * torch.norm(self.rate) ** 2 + v = ( + np.log(1.0 / 0.1) + + self.likelihood + + 0.5 * self.s * torch.norm(self.rate) ** 2 + ) + + phis = self.phis.numpy() + counts = self.counts.numpy() + theta = cp.Variable(self.get_m()) + l, Lambda, u = self.get_constraints() + Gamma_half = self.cov().numpy() + + objective_min = cp.Minimize(x @ theta) + objective_max = cp.Maximize(x @ theta) + + constraints = [] + Lambda = Lambda @ Gamma_half + constraints.append(Lambda @ theta >= l) + constraints.append(Lambda @ theta <= u) + + constraints.append( + -cp.sum(counts @ cp.log(phis @ theta)) + + cp.sum(phis @ theta) + + self.s * 0.5 * cp.sum_squares(theta) + <= v + ) + + prob = cp.Problem(objective_min, constraints) + prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False) + lcb = np.dot(theta.value, x) + prob = cp.Problem(objective_max, constraints) + prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False) + ucb = np.dot(theta.value, x) + + return map, ucb, lcb + + def map_lcb_ucb_approx_action(self, S, dt=1.0, beta=2.0): + phi = self.packing.integral(S) + map = dt * phi @ self.rate + + ucb = map + beta * np.sqrt(phi @ self.W_inv_approx @ phi.T) + # ucb = np.minimum(dt * ucb, self.B * S.volume() * dt) + + lcb = map - beta * np.sqrt(phi @ self.W_inv_approx @ phi.T) + # lcb = np.maximum(dt * lcb, self.b * S.volume() * dt) + return map, lcb, ucb + + def fit_ellipsoid_approx(self): + + if self.uncertainty == "laplace": + self.W = self.construct_covariance_matrix_laplace() + elif self.uncertainty == "least-sq": + self.W = self.construct_covariance_matrix_regression() + elif self.uncertainty == "bins": + self.W = self.construct_covariance_matrix_bins() + else: + raise AssertionError("Not implemented.") + + self.W_inv_approx = torch.pinverse(self.W) + + def construct_covariance_matrix(self): + if self.estimator == "likelihood": + self.W = self.construct_covariance_matrix_laplace() + elif self.estimator == "least-sq": + self.W = self.construct_covariance_matrix_regression() + elif self.estimator == "bins": + self.W = self.construct_covariance_matrix_bins() + else: + raise NotImplementedError("This estimator is not implemented.") + return self.W + + def construct_covariance_matrix_laplace(self, theta=None): + W = torch.zeros(size=(self.get_m(), self.get_m())).double() + + if self.feedback == "count-record": + + if self.observations is not None: + + if theta is None: + D = torch.diag( + 1.0 / ((self.observations @ self.rate).view(-1) ** 2) + ) + W = self.observations.T @ D @ self.observations + else: + D = torch.diag(1.0 / ((self.observations @ theta).view(-1) ** 2)) + W = self.observations.T @ D @ self.observations + + elif self.feedback == "histogram": + # D = torch.diag(self.counts / (self.phis @ self.rate).view(-1) ** 2) + if len(self.variances_histogram) > 0: + variances = self.variances_histogram.view(-1).clone() + + for i in range(variances.shape[0]): + variances[i] = variances[i] * self.variance_correction(variances[i]) + + D = torch.diag(self.counts / variances**2) + + W = self.phis.T @ D @ self.phis + else: + raise AssertionError("Not implemented.") + + return W + torch.eye(self.get_m()).double() * self.s + + def construct_covariance_matrix_regression(self): + W = torch.zeros(size=(self.get_m(), self.get_m())).double() + if self.data is not None: + variances = self.variances + if self.feedback == "count-record": + mask = self.bucketized_counts > 0 + tau = self.total_bucketized_time + for index_o, o in enumerate(self.bucketized_obs): + n = mask[index_o] + if n > 0: + A = ( + self.varphis[index_o, :].view(-1, 1) + @ self.varphis[index_o, :].view(1, -1) + * tau[index_o] + ) + k = self.variance_correction(tau[index_o] * variances[index_o]) + W = W + A / (variances[index_o] * k) + + elif self.feedback == "histogram": + + if len(self.variances_histogram) > 0: + variances = self.variances_histogram.view(-1).clone() + + for i in range(variances.shape[0]): + variances[i] = variances[i] * self.variance_correction( + variances[i] + ) + + D = torch.diag(1.0 / variances) + W = self.phis.T @ D @ self.phis + + return W + torch.eye(self.get_m()).double() * self.s + + def construct_covariance_matrix_bins(self): + W = torch.zeros(size=(self.get_m(), self.get_m())).double() + + if self.feedback == "count-record": + + mask = self.bucketized_counts > 0 + tau = self.total_bucketized_time + varphis = self.varphis[mask, :] + variances = self.variances.view(-1).clone() + + for i in range(variances.size()[0]): + if mask[i] > 0: + variances[i] = variances[i] * self.variance_correction( + variances[i] * tau[i] + ) + + variances = variances[mask] + tau = tau[mask] + + if self.observations is not None: + D = torch.diag(tau / variances) + W = varphis.T @ D @ varphis + + elif self.feedback == "histogram": + + if len(self.variances_histogram) > 0: + variances = self.variances_histogram.view(-1).clone() + + for i in range(variances.shape[0]): + variances[i] = variances[i] * self.variance_correction(variances[i]) + + D = torch.diag(1.0 / variances) + W = self.phis.T @ D @ self.phis + else: + raise AssertionError("Not implemented.") + + return W + torch.eye(self.get_m()).double() * self.s + + def gap(self, S, actions, w, dt, beta=2.0): + """ + Estimates the gap of an action S, + :param S: + :param dt: + :return: + """ + phi = self.packing.integral(S) * dt + Gamma_half = self.packing.cov().numpy() + + if self.approx is None: + l, Lambda, u = self.get_constraints() + Lambda = Lambda @ Gamma_half + ucbs = [] + for action in actions: + phi_a = self.packing.integral(action) * dt + # ucb, _ = maximize_on_elliptical_slice(phi_a.numpy()-phi.numpy(), self.W.numpy(), self.rate.view(-1).numpy(), beta, l, Lambda, u) + ucb, _ = maximize_on_elliptical_slice( + phi.numpy(), + self.W.numpy(), + self.rate.view(-1).numpy(), + beta, + l, + Lambda, + u, + ) + ucbs.append(float(ucb)) + gap = torch.max(torch.Tensor(ucbs)) + + else: + if self.data is None: + return (self.B - self.b) * S.volume() + + if self.ucb_identified == False: + print("Recomputing UCB.....") + self.ucb_identified = True + self.fit_ellipsoid_approx() + self.max_ucb = -1000 + self.ucb_action = None + + for action in actions: + _, __, ucb = self.map_lcb_ucb_approx_action( + action, dt=dt, beta=self.beta(0) + ) + ucb = ucb / w(action) + + if ucb > self.max_ucb: + self.max_ucb = ucb + self.ucb_action = action + + map, lcb, ucb = self.map_lcb_ucb_approx_action(S, dt=dt, beta=self.beta(0)) + gap = w(S) * self.max_ucb - lcb + return gap + + def information(self, S, dt, precomputed=None): + + if self.data is None: + return 1.0 + + if self.W is None: + self.construct_covariance_matrix() + + if self.feedback == "count-record": + varphi_UCB = self.packing.integral(self.ucb_action).view(1, -1) * dt + + if precomputed is not None: + Upsilon = precomputed[S] * dt + else: + ind = [] + for index, set in enumerate(self.basic_sets): + if S.inside(set): + ind.append(index) + Upsilon = self.varphis[ind, :] * dt + + I = torch.eye(Upsilon.size()[0]).double() + G = ( + self.W_inv_approx + - self.W_inv_approx + @ Upsilon.T + @ torch.inverse(I + Upsilon @ Upsilon.T) + @ Upsilon + @ self.W_inv_approx + ) + return ( + 10e-4 + + torch.logdet(varphi_UCB @ self.W_inv_approx @ varphi_UCB.T) + - torch.logdet(varphi_UCB @ G @ varphi_UCB.T) + ) + + elif self.feedback == "histogram": + + return torch.log( + 1 + + self.packing.integral(S) + @ self.W_inv_approx + @ self.packing.integral(S) + * dt**2 + ) + + def map_lcb_ucb_approx(self, S, n, beta=2.0, delta=0.01): + xtest = S.return_discretization(n) + if self.data is None: + return ( + self.b + 0 * xtest[:, 0].view(-1, 1), + self.b + 0 * xtest[:, 0].view(-1, 1), + self.B + 0 * xtest[:, 0].view(-1, 1), + ) + + self.fit_ellipsoid_approx() + self.fit_ellipsoid_approx() + + Phi = self.packing.embed(xtest).double() + map = Phi @ self.rate + N = Phi.size()[0] + + ucb = torch.zeros(size=(N, 1)).double() + lcb = torch.zeros(size=(N, 1)).double() + + for i in range(N): + x = Phi[i, :].view(-1, 1) + ucb[i, 0] = np.minimum( + map[i] + beta * np.sqrt(x.T @ self.W_inv_approx @ x), self.B + ) + lcb[i, 0] = np.maximum( + map[i] - beta * np.sqrt(x.T @ self.W_inv_approx @ x), self.b + ) + return map, lcb, ucb + + def map_lcb_ucb(self, S, n, beta=2.0): + """ + Calculate exact confidence using laplace approximation on a whole set domain + :param S: set + :param n: discretization + :param beta: beta + :return: + """ + + xtest = S.return_discretization(n) + if self.data is None: + return ( + self.b + 0 * xtest[:, 0].view(-1, 1), + self.b + 0 * xtest[:, 0].view(-1, 1), + self.B + 0 * xtest[:, 0].view(-1, 1), + ) + + N = xtest.size()[0] + Phi = self.packing.embed(xtest) + map = Phi @ self.rate + + if self.uncertainty == "laplace": + W = self.construct_covariance_matrix_laplace() + elif self.uncertainty == "least-sq": + W = self.construct_covariance_matrix_regression() + elif self.uncertainty == "bins": + W = self.construct_covariance_matrix_bins() + else: + raise AssertionError("Not implemented ") + + Gamma_half = self.cov().numpy() + l, Lambda, u = self.get_constraints() + Lambda = Lambda @ Gamma_half + ucb = torch.zeros(size=(N, 1)).double() + lcb = torch.zeros(size=(N, 1)).double() + + for i in range(N): + x = Phi[i, :] + ucbi, _ = maximize_on_elliptical_slice( + x.numpy(), + (W).numpy(), + self.rate.view(-1).numpy(), + np.sqrt(beta), + l, + Lambda, + u, + ) + lcbi, _ = maximize_on_elliptical_slice( + -x.numpy(), + (W).numpy(), + self.rate.view(-1).numpy(), + np.sqrt(beta), + l, + Lambda, + u, + ) + ucb[i, 0] = ucbi + lcb[i, 0] = -lcbi + + return map, lcb, ucb + + def map_lcb_ucb_likelihood_ratio(self, S, n, delta=0.1, current=False): + xtest = S.return_discretization(n) + + if self.data is None: + return ( + self.b + 0 * xtest[:, 0].view(-1, 1), + self.b + 0 * xtest[:, 0].view(-1, 1), + self.B + 0 * xtest[:, 0].view(-1, 1), + ) + + N = xtest.size()[0] + Phi = self.packing.embed(xtest) + map = Phi @ self.rate + + ucb = torch.zeros(size=(N, 1)).double() + lcb = torch.zeros(size=(N, 1)).double() + + phis = self.phis.numpy() + + if current: + if self.observations is not None: + v = ( + np.log(1.0 / delta) + - torch.sum(torch.log(self.observations @ self.rate)) + + torch.sum(self.phis @ self.rate) + + self.s * 0.5 * torch.sum(self.rate**2) + ) + else: + v = ( + np.log(1.0 / delta) + + torch.sum(self.phis @ self.rate) + + self.s * 0.5 * torch.sum(self.rate**2) + ) + else: + if self.feedback == "count-record": + v = ( + np.log(1.0 / delta) + + self.loglikelihood + + 0.5 * self.s * torch.sum(self.rate**2) + ) + elif self.feedback == "histogram": + v = ( + np.log(1.0 / delta) + + self.loglikelihood + + 0.5 * self.s * torch.sum(self.rate**2) + ) + else: + raise NotImplementedError("Not compatible with given feedback model ") + + l, Lambda, u = self.get_constraints() + Gamma_half = self.cov().numpy() + Lambda = Lambda @ Gamma_half + + for i in range(N): + x = Phi[i, :].numpy() + + theta = cp.Variable(self.get_m()) + + objective_min = cp.Minimize(x @ theta) + objective_max = cp.Maximize(x @ theta) + + constraints = [] + constraints.append(Lambda @ theta >= l) + constraints.append(Lambda @ theta <= u) + + if self.feedback == "count-record": + if self.observations is not None: + observations = self.observations.numpy() + + constraints.append( + -cp.sum(cp.log(observations @ theta)) + + cp.sum(phis @ theta) + + self.s * 0.5 * cp.sum_squares(theta) + <= v + ) + else: + constraints.append( + cp.sum(phis @ theta) + self.s * 0.5 * cp.sum_squares(theta) <= v + ) + + elif self.feedback == "histogram": + constraints.append( + -cp.sum(cp.log(phis @ theta)) + + cp.sum(phis @ theta) + + self.s * 0.5 * cp.sum_squares(theta) + <= v + ) + else: + raise NotImplementedError("Does not exist.") + + prob = cp.Problem(objective_min, constraints) + prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False) + lcb[i, 0] = float(np.dot(theta.value, x)) + + prob = cp.Problem(objective_max, constraints) + prob.solve(solver=cp.MOSEK, warm_start=False, verbose=False) + ucb[i, 0] = float(np.dot(theta.value, x)) + + return map, lcb, ucb + + def mean_var_conformal_set(self, S, dt, beta=2.0, max_val=None, delta=0.05): + # self.bucketize_prepare() + if max_val is None: + max_val = int(self.B * self.basic_sets[0].volume() * dt) + 1 + map, lcb, ucb = self.conformal_confidence_set( + S, delta=delta, max_val=max_val, dt=dt + ) + return map, lcb, ucb + + def conformal_score_func(self, theta, new, index): + + if new[1] is None: + n_new = 0 + else: + n_new = new[1].size()[0] + + varphi = self.packing.integral(new[0]) * new[2] + err_new = abs(float(n_new) - float(varphi @ theta)) + n = len(self.bucketized_obs[index]) + + if n > 0: + + phis = self.varphis[index].repeat(n, 1) + res = torch.Tensor(self.bucketized_obs[index]).double() + + err = torch.abs(res - (phis @ theta.view(-1, 1)).view(-1)) + + return torch.sum(err < err_new).double() / float(n + 1.0) + 1.0 / ( + float(n) + 1.0 + ) + + else: + return 0.0 + + def conformal_confidence(self, delta=0.05, max_val=20, dt=1, step=1): + lcb = [] + ucb = [] + map = [] + + if self.data is not None: + self.bucketization(time=True) + + for S in self.basic_sets: + m, u, l = self.conformal_confidence_set( + S, delta=delta, max_val=max_val, dt=dt, step=step + ) + + map.append(m) + ucb.append(u) + lcb.append(l) + + return ( + torch.Tensor(map).double(), + torch.Tensor(ucb).double(), + torch.Tensor(lcb).double(), + ) + + def conformal_confidence_set(self, S, delta=0.05, max_val=20, dt=1.0, step=1): + """ + :return: (lcb,ucb) + """ + + if self.data is not None: + if self.feedback == "count-record": + self.penalized_likelihood() + elif self.feedback == "histogram": + self.penalized_likelihood_integral() + + # identify the set in basic sets + index = 0 + for set in self.basic_sets: + if set.inside(S): + break + index += 1 + + # calculate map estimate + map = float(self.rate @ self.packing.integral(S)) + else: + map = self.b + return map, self.B, self.b + + scores = [] + j = 0 + score = 1.0 + lowest = 0 + n = float(len(self.bucketized_obs[index])) + + while score > np.ceil((1 - delta) * (n + 1)) / (n + 1) and j <= max_val: + lowest = j + if j > 0: + obs = torch.zeros(size=(j, self.d)).double() + for i in range(self.d): + obs[:, i] = torch.from_numpy( + np.random.uniform(S.bounds[i, 0], S.bounds[i, 1], size=j) + ) + else: + obs = None + + # new observation + new = (S, obs, dt) + + old_phis, old_observations, old_counts = self.add_data_point_and_remove(new) + + if self.feedback == "count-record": + theta_new = self.penalized_likelihood() + elif self.feedback == "histogram": + theta_new = self.penalized_likelihood_integral() + + # restore back the data + self.phis = old_phis + self.observations = old_observations + self.counts = old_counts + + # calculate the score + score = self.conformal_score_func(theta_new, new, index) + n = float(len(self.bucketized_obs[index])) + + print(j, "/", max_val, score, np.ceil((1 - delta) * (n + 1)) / (n + 1)) + j = j + 1 + + j = max_val + score = 1.0 + largest = max_val + + while score > np.ceil((1 - delta) * (n + 1)) / (n + 1) and j > lowest: + largest = j + if j > 0: + obs = torch.zeros(size=(j, self.d)).double() + for i in range(self.d): + obs[:, i] = torch.from_numpy( + np.random.uniform(S.bounds[i, 0], S.bounds[i, 1], size=j) + ) + else: + obs = None + + # new observation + new = (S, obs, dt) + + old_phis, old_observations, old_counts = self.add_data_point_and_remove(new) + + if self.feedback == "count-record": + theta_new = self.penalized_likelihood() + elif self.feedback == "histogram": + theta_new = self.penalized_likelihood_integral() + + # restore back the data + self.phis = old_phis + self.observations = old_observations + self.counts = old_counts + + # calculate the score + score = self.conformal_score_func(theta_new, new, index) + n = float(len(self.bucketized_obs[index])) + + print(j, "/", max_val, score, np.ceil((1 - delta) * (n + 1)) / (n + 1)) + j = j - 1 + # scores = np.array(scores) + # mask = scores < np.ceil((1-delta)*(n+1))/(n+1) + + # if np.sum(mask) == 0: + # lowest = 0 + # largest = max_val + # else: + # lowest = np.min(np.arange(0,max_val,step)[mask]) + # largest = np.max(np.arange(0, max_val, step)[mask]) + + lcb = lowest / dt / S.volume() + ucb = largest / dt / S.volume() + + return (map, ucb, lcb) diff --git a/stpy/point_processes/positive_basis_estimator.py b/stpy/point_processes/positive_basis_estimator.py index 3d09bc6..d76a422 100644 --- a/stpy/point_processes/positive_basis_estimator.py +++ b/stpy/point_processes/positive_basis_estimator.py @@ -9,125 +9,124 @@ from stpy.borel_set import BorelSet from stpy.point_processes.poisson import PoissonPointProcess -class RateEstimator(): - def __init__(self): - pass - - - def get_min_max(self): - basic_sets = self.hierarchy.get_sets_level(self.hierarchy.levels) - volumes = [] - for index, elementary in enumerate(basic_sets): - volumes.append(elementary.volume()) - - return (np.min(volumes), np.max(volumes)) - - - - def load_data(self, data, times = True): - self.approx_fit = False - - if len(data) > 0: - self.approx_fit = False - phis = [] - observations = [] - self.data = data.copy() - counts = [] - #times_arr = [] - - for sample in data: - S, obs, dt = sample - count = torch.Tensor([0]) - - if obs is not None: - if times == True: - emb = self.packing.embed(obs) * dt - else: - emb = self.packing.embed(obs) - - phi = self.packing.integral(S) * dt - observations.append(emb) - count = torch.Tensor([emb.size()[0]]) - phis.append(phi.view(1, -1)) - - - if self.dual == True: - self.global_dt = dt - dist_matrix = torch.cdist(obs, self.anchor_points, p = 2) - for k in range(obs.size()[0]): - index = torch.argmin(dist_matrix[k,:]) - self.anchor_weights[index] = self.anchor_weights[index] + 1. - else: - phi = self.packing.integral(S) * dt - phis.append(phi.view(1, -1)) - counts.append(count) - - self.counts = torch.cat(counts, dim=0) # n(A_i) - self.phis = torch.cat(phis, dim=0) # integrals of A_i - - if len(observations) > 0: - self.observations = torch.cat(observations, dim=0) # \{x_i\}_{i=1}^{n(A_i)} - else: - self.observations = None - - if self.feedback == "count-record": - self.bucketization() - - def add_data_point(self, new_data, times = True): - self.approx_fit = False - - if self.data is None: - self.load_data([new_data]) - return - - self.data.append(new_data) - - # update standard form data - S, obs, dt = new_data - if obs is not None: - - if times == True: - emb = self.packing.embed(obs) * dt - else: - emb = self.packing.embed(obs) - - phi = self.packing.integral(S).view(1, -1) * dt - - count = torch.Tensor([emb.size()[0]]) - - if self.observations is not None: - self.observations = torch.cat((self.observations, emb), dim=0) - #self.times = torch.cat((self.times, dt * torch.ones(size=(emb.size()[0],1)).view(-1).double() )) - else: - self.observations = emb - #self.times = dt * torch.ones(size=(emb.size()[0],1)).view(-1).double() - - - if self.dual == True: - - dist_matrix = torch.cdist(obs, self.anchor_points, p=2) - for k in range(obs.size()[0]): - index = torch.argmin(dist_matrix[k, :]) - self.anchor_weights[index] += 1. - else: - count = torch.Tensor([0]) - phi = self.packing.integral(S).view(1, -1) * dt - - - self.phis = torch.cat((self.phis, phi), dim=0) - self.counts = torch.cat((self.counts, count)) - - if self.feedback == "count-record": - - for index, elementary in enumerate(self.basic_sets): - - if S.inside(elementary) == True: - if obs is not None: - mask = elementary.is_inside(obs) - self.total_bucketized_obs[index] += float(obs[mask].size()[0]) - else: - self.total_bucketized_obs[index] += 0.0 - - self.bucketized_counts[index] += 1 - self.total_bucketized_time[index] += dt +class RateEstimator: + + def __init__(self): + pass + + def get_min_max(self): + basic_sets = self.hierarchy.get_sets_level(self.hierarchy.levels) + volumes = [] + for index, elementary in enumerate(basic_sets): + volumes.append(elementary.volume()) + + return (np.min(volumes), np.max(volumes)) + + def load_data(self, data, times=True): + self.approx_fit = False + + if len(data) > 0: + self.approx_fit = False + phis = [] + observations = [] + self.data = data.copy() + counts = [] + # times_arr = [] + + for sample in data: + S, obs, dt = sample + count = torch.Tensor([0]) + + if obs is not None: + if times == True: + emb = self.packing.embed(obs) * dt + else: + emb = self.packing.embed(obs) + + phi = self.packing.integral(S) * dt + observations.append(emb) + count = torch.Tensor([emb.size()[0]]) + phis.append(phi.view(1, -1)) + + if self.dual == True: + self.global_dt = dt + dist_matrix = torch.cdist(obs, self.anchor_points, p=2) + for k in range(obs.size()[0]): + index = torch.argmin(dist_matrix[k, :]) + self.anchor_weights[index] = ( + self.anchor_weights[index] + 1.0 + ) + else: + phi = self.packing.integral(S) * dt + phis.append(phi.view(1, -1)) + counts.append(count) + + self.counts = torch.cat(counts, dim=0) # n(A_i) + self.phis = torch.cat(phis, dim=0) # integrals of A_i + + if len(observations) > 0: + self.observations = torch.cat( + observations, dim=0 + ) # \{x_i\}_{i=1}^{n(A_i)} + else: + self.observations = None + + if self.feedback == "count-record": + self.bucketization() + + def add_data_point(self, new_data, times=True): + self.approx_fit = False + + if self.data is None: + self.load_data([new_data]) + return + + self.data.append(new_data) + + # update standard form data + S, obs, dt = new_data + if obs is not None: + + if times == True: + emb = self.packing.embed(obs) * dt + else: + emb = self.packing.embed(obs) + + phi = self.packing.integral(S).view(1, -1) * dt + + count = torch.Tensor([emb.size()[0]]) + + if self.observations is not None: + self.observations = torch.cat((self.observations, emb), dim=0) + # self.times = torch.cat((self.times, dt * torch.ones(size=(emb.size()[0],1)).view(-1).double() )) + else: + self.observations = emb + # self.times = dt * torch.ones(size=(emb.size()[0],1)).view(-1).double() + + if self.dual == True: + + dist_matrix = torch.cdist(obs, self.anchor_points, p=2) + for k in range(obs.size()[0]): + index = torch.argmin(dist_matrix[k, :]) + self.anchor_weights[index] += 1.0 + else: + count = torch.Tensor([0]) + phi = self.packing.integral(S).view(1, -1) * dt + + self.phis = torch.cat((self.phis, phi), dim=0) + self.counts = torch.cat((self.counts, count)) + + if self.feedback == "count-record": + + for index, elementary in enumerate(self.basic_sets): + + if S.inside(elementary) == True: + if obs is not None: + mask = elementary.is_inside(obs) + self.total_bucketized_obs[index] += float(obs[mask].size()[0]) + else: + self.total_bucketized_obs[index] += 0.0 + + self.bucketized_counts[index] += 1 + self.total_bucketized_time[index] += dt diff --git a/stpy/point_processes/rate_estimator.py b/stpy/point_processes/rate_estimator.py index e0b0927..1082251 100644 --- a/stpy/point_processes/rate_estimator.py +++ b/stpy/point_processes/rate_estimator.py @@ -3,208 +3,214 @@ import torch -class RateEstimator(): - - def __init__(self): - pass - - def get_min_max(self): - basic_sets = self.hierarchy.get_sets_level(self.hierarchy.levels) - volumes = [] - for index, elementary in enumerate(basic_sets): - volumes.append(elementary.volume()) - - return (np.min(volumes), np.max(volumes)) - - def load_data(self, data: List, times=True): - r"""Load the data and save $\Phi(x)$ into `self.observations`, $n(A_i)$ in - `self.counts` and $\int_{A_i} \phi_j(x) dx$ into `self.phis` - - - Parameters - ---------- - data - - List of samples, where each sample is a tuple of - - * The Borel set on which the data lies - * A tensor of the datapoints them selves i.e. of shape - [num_data_points, self.d...] - * The amount of time in minutes that the data spans - i.e. max time - min time of all data points - - times, optional - by default True - """ - self.approx_fit = False - - if len(data) > 0: - self.approx_fit = False - phis = [] - observations = [] - self.data = data.copy() - counts = [] - # times_arr = [] - x = [] - for sample in data: - S, obs, dt = sample - count = torch.Tensor([0]) - - if obs is not None: - x.append(obs) - - if obs is not None: - obs, _, duplicates = torch.unique(obs, dim=0, return_inverse=True, return_counts=True) - obs = torch.einsum('ij,i->ij', obs, duplicates) - - if times == True: - emb = self.packing.embed(obs) * dt - else: - emb = self.packing.embed(obs) - - phi = self.packing.integral(S) * dt - observations.append(emb) - count = torch.Tensor([emb.size()[0]]) - phis.append(phi.view(1, -1)) - - if self.dual == True: - self.global_dt = dt - dist_matrix = torch.cdist(obs, self.anchor_points, p=2) - for k in range(obs.size()[0]): - index = torch.argmin(dist_matrix[k, :]) - self.anchor_weights[index] = self.anchor_weights[index] + 1. - else: - phi = self.packing.integral(S) * dt - phis.append(phi.view(1, -1)) - counts.append(count) - - self.counts = torch.cat(counts, dim=0) # n(A_i) - self.phis = torch.cat(phis, dim=0) # integrals of A_i - if len(x) > 0: - self.x = torch.cat(x, dim=0) - else: - self.x = None - - if len(observations) > 0: - self.observations = torch.cat(observations, dim=0) # \{x_i\}_{i=1}^{n(A_i)} - else: - self.observations = None - - if self.feedback == "count-record": - self.bucketization() - - def add_data_point(self, new_data, times=True): - self.approx_fit = False - - if self.data is None: - self.load_data([new_data]) - return - - self.data.append(new_data) - - # update standard form data - S, obs, dt = new_data - if obs is not None: - - if times == True: - emb = self.packing.embed(obs) * dt - else: - emb = self.packing.embed(obs) - - phi = self.packing.integral(S).view(1, -1) * dt - - count = torch.Tensor([emb.size()[0]]) - - if self.observations is not None: - self.observations = torch.cat((self.observations, emb), dim=0) - # self.times = torch.cat((self.times, dt * torch.ones(size=(emb.size()[0],1)).view(-1).double() )) - else: - self.observations = emb - # self.times = dt * torch.ones(size=(emb.size()[0],1)).view(-1).double() - - if self.dual == True: - - dist_matrix = torch.cdist(obs, self.anchor_points, p=2) - for k in range(obs.size()[0]): - index = torch.argmin(dist_matrix[k, :]) - self.anchor_weights[index] += 1. - else: - count = torch.Tensor([0]) - phi = self.packing.integral(S).view(1, -1) * dt - - self.phis = torch.cat((self.phis, phi), dim=0) - self.counts = torch.cat((self.counts, count)) - - if self.feedback == "count-record": - - for index, elementary in enumerate(self.basic_sets): - - if S.inside(elementary) == True: - if obs is not None: - mask = elementary.is_inside(obs) - self.total_bucketized_obs[index] += float(obs[mask].size()[0]) - else: - self.total_bucketized_obs[index] += 0.0 - - self.bucketized_counts[index] += 1 - self.total_bucketized_time[index] += dt - - def get_m(self): - return self.packing.get_m() - - def mean_rate(self, S, n=128): - xtest = S.return_discretization(n) - if self.rate is not None: - return self.packing.embed(xtest) @ self.rate.view(-1, 1) - else: - return self.packing.embed(xtest)[:, 0].view(-1, 1) * 0 + self.b - - def mean_rate_points(self, xtest): - if self.rate is not None: - return self.packing.embed(xtest) @ self.rate.view(-1, 1) - else: - return self.packing.embed(xtest)[:, 0].view(-1, 1) * 0 + self.b - - def mean_set(self, S, dt=1): - phi = self.packing.integral(S) * dt - map = phi @ self.rate.view(-1, 1) - return map - - def rate_value(self, x, dt=1): - phi = self.packing.embed(x) * dt - - if self.rate is not None: - map = phi @ self.rate.view(-1, 1) - else: - print("Rate function not fitted!") - map = 0 * phi[:, 0].view(-1, 1) + self.b - - return map - - def sample_value(self, S): - """ - Given a pre-sampled value evaluate certain portions of the domain S - :param S: - :return: - """ - return self.packing.integral(S) @ self.sampled_theta - - def sample_path(self, S, n=128): - xtest = S.return_discretization(n) - return self.packing.embed(xtest) @ self.sampled_theta - - def sample_path_points(self, xtest): - return self.packing.embed(xtest) @ self.sampled_theta.view(-1, 1) - - def get_observations(self): - if self.data is not None: - points = [] - for datapoint in self.data: - if datapoint[1] is not None: - points.append(datapoint[1]) - if len(points) > 0: - return torch.vstack(points) - else: - return None - else: - return None +class RateEstimator: + + def __init__(self): + pass + + def get_min_max(self): + basic_sets = self.hierarchy.get_sets_level(self.hierarchy.levels) + volumes = [] + for index, elementary in enumerate(basic_sets): + volumes.append(elementary.volume()) + + return (np.min(volumes), np.max(volumes)) + + def load_data(self, data: List, times=True): + r"""Load the data and save $\Phi(x)$ into `self.observations`, $n(A_i)$ in + `self.counts` and $\int_{A_i} \phi_j(x) dx$ into `self.phis` + + + Parameters + ---------- + data + + List of samples, where each sample is a tuple of + + * The Borel set on which the data lies + * A tensor of the datapoints them selves i.e. of shape + [num_data_points, self.d...] + * The amount of time in minutes that the data spans + i.e. max time - min time of all data points + + times, optional + by default True + """ + self.approx_fit = False + + if len(data) > 0: + self.approx_fit = False + phis = [] + observations = [] + self.data = data.copy() + counts = [] + # times_arr = [] + x = [] + for sample in data: + S, obs, dt = sample + count = torch.Tensor([0]) + + if obs is not None: + x.append(obs) + + if obs is not None: + obs, _, duplicates = torch.unique( + obs, dim=0, return_inverse=True, return_counts=True + ) + obs = torch.einsum("ij,i->ij", obs, duplicates) + + if times == True: + emb = self.packing.embed(obs) * dt + else: + emb = self.packing.embed(obs) + + phi = self.packing.integral(S) * dt + observations.append(emb) + count = torch.Tensor([emb.size()[0]]) + phis.append(phi.view(1, -1)) + + if self.dual == True: + self.global_dt = dt + dist_matrix = torch.cdist(obs, self.anchor_points, p=2) + for k in range(obs.size()[0]): + index = torch.argmin(dist_matrix[k, :]) + self.anchor_weights[index] = ( + self.anchor_weights[index] + 1.0 + ) + else: + phi = self.packing.integral(S) * dt + phis.append(phi.view(1, -1)) + counts.append(count) + + self.counts = torch.cat(counts, dim=0) # n(A_i) + self.phis = torch.cat(phis, dim=0) # integrals of A_i + if len(x) > 0: + self.x = torch.cat(x, dim=0) + else: + self.x = None + + if len(observations) > 0: + self.observations = torch.cat( + observations, dim=0 + ) # \{x_i\}_{i=1}^{n(A_i)} + else: + self.observations = None + + if self.feedback == "count-record": + self.bucketization() + + def add_data_point(self, new_data, times=True): + self.approx_fit = False + + if self.data is None: + self.load_data([new_data]) + return + + self.data.append(new_data) + + # update standard form data + S, obs, dt = new_data + if obs is not None: + + if times == True: + emb = self.packing.embed(obs) * dt + else: + emb = self.packing.embed(obs) + + phi = self.packing.integral(S).view(1, -1) * dt + + count = torch.Tensor([emb.size()[0]]) + + if self.observations is not None: + self.observations = torch.cat((self.observations, emb), dim=0) + # self.times = torch.cat((self.times, dt * torch.ones(size=(emb.size()[0],1)).view(-1).double() )) + else: + self.observations = emb + # self.times = dt * torch.ones(size=(emb.size()[0],1)).view(-1).double() + + if self.dual == True: + + dist_matrix = torch.cdist(obs, self.anchor_points, p=2) + for k in range(obs.size()[0]): + index = torch.argmin(dist_matrix[k, :]) + self.anchor_weights[index] += 1.0 + else: + count = torch.Tensor([0]) + phi = self.packing.integral(S).view(1, -1) * dt + + self.phis = torch.cat((self.phis, phi), dim=0) + self.counts = torch.cat((self.counts, count)) + + if self.feedback == "count-record": + + for index, elementary in enumerate(self.basic_sets): + + if S.inside(elementary) == True: + if obs is not None: + mask = elementary.is_inside(obs) + self.total_bucketized_obs[index] += float(obs[mask].size()[0]) + else: + self.total_bucketized_obs[index] += 0.0 + + self.bucketized_counts[index] += 1 + self.total_bucketized_time[index] += dt + + def get_m(self): + return self.packing.get_m() + + def mean_rate(self, S, n=128): + xtest = S.return_discretization(n) + if self.rate is not None: + return self.packing.embed(xtest) @ self.rate.view(-1, 1) + else: + return self.packing.embed(xtest)[:, 0].view(-1, 1) * 0 + self.b + + def mean_rate_points(self, xtest): + if self.rate is not None: + return self.packing.embed(xtest) @ self.rate.view(-1, 1) + else: + return self.packing.embed(xtest)[:, 0].view(-1, 1) * 0 + self.b + + def mean_set(self, S, dt=1): + phi = self.packing.integral(S) * dt + map = phi @ self.rate.view(-1, 1) + return map + + def rate_value(self, x, dt=1): + phi = self.packing.embed(x) * dt + + if self.rate is not None: + map = phi @ self.rate.view(-1, 1) + else: + print("Rate function not fitted!") + map = 0 * phi[:, 0].view(-1, 1) + self.b + + return map + + def sample_value(self, S): + """ + Given a pre-sampled value evaluate certain portions of the domain S + :param S: + :return: + """ + return self.packing.integral(S) @ self.sampled_theta + + def sample_path(self, S, n=128): + xtest = S.return_discretization(n) + return self.packing.embed(xtest) @ self.sampled_theta + + def sample_path_points(self, xtest): + return self.packing.embed(xtest) @ self.sampled_theta.view(-1, 1) + + def get_observations(self): + if self.data is not None: + points = [] + for datapoint in self.data: + if datapoint[1] is not None: + points.append(datapoint[1]) + if len(points) > 0: + return torch.vstack(points) + else: + return None + else: + return None diff --git a/stpy/point_processes/seasonal_point_process.py b/stpy/point_processes/seasonal_point_process.py index 3590e71..50cb824 100644 --- a/stpy/point_processes/seasonal_point_process.py +++ b/stpy/point_processes/seasonal_point_process.py @@ -6,77 +6,98 @@ class SeasonalPoissonPointProcess(PoissonPointProcess): - def __init__(self, *args, seasonality=lambda t: 1., **kwargs): - self.seasonality = seasonality - - def rate_default(self, x, t, dt=1.): - return (self.B * torch.sum(torch.exp(-(x + 1)) * torch.sin(2 * x * np.pi) ** 2, dim=1).view(-1, - 1) + self.b) * dt - - def rate_volume(self, S, t, dt=1, rate=None): - if self.rate_volume_f is None: - # integrate rate numerically over S - import scipy.integrate as integrate - if rate is None: - rate = self.rate - else: - rate = rate - integral = 0 - if self.d == 1: - # integrate = S.volume()* self.rate(torch.from_numpy(S.bounds[0,1]).view(1)) - integral, _ = integrate.quad(lambda x: rate(torch.Tensor([x]).view(1, 1), t).numpy(), - float(S.bounds[0, 0]), float(S.bounds[0, 1])) - elif self.d == 2: - integrand = lambda x, y: rate(torch.Tensor([x, y], t).view(1, 2).double()).numpy() - integral, _ = integrate.dblquad(integrand, float(S.bounds[0, 0]), float(S.bounds[0, 1]), - lambda x: float(S.bounds[1, 0]), lambda x: float(S.bounds[1, 1])) - - return integral * dt - else: - return self.rate_volume_f(S) * dt - - def sample(self, S, t, dt=1., verbose=False, rate=None): - """ - - :param S: set where it should be sampled - :return: - """ - if self.exact == True: - return self.sample_discretized(S, t, dt=dt) - else: - - lam = self.rate_volume(S, t, dt) - n = np.random.poisson(lam=lam) - new_sample = [] - vol = S.volume() - size = 0 - - alpha = 1. / lam - - while size < n: - # uniform sample g(s) = 1/vol(S) - sample = S.uniform_sample(1) - - t = self.rate(sample, t) / (alpha * lam) - p = np.random.uniform(0, 1) - if p < t: - new_sample.append(sample.view(1, -1)) - size = size + 1 - - if len(new_sample) > 1: - x = torch.cat(new_sample, dim=0) - else: - return None - return x - - def sample_discretized(self, S, t, dt, n=50): - lam = float(self.rate_volume(S, t, dt)) - count = np.random.poisson(lam=lam) - if count > 0: - x = S.return_discretization(n) - r = self.rate(x, t) * dt - sample = torch.from_numpy( - np.random.choice(np.arange(0, x.size()[0], 1), size=count, p=(r / torch.sum(r)).numpy().reshape(-1))) - return x[sample, :] - else: - return None + def __init__(self, *args, seasonality=lambda t: 1.0, **kwargs): + self.seasonality = seasonality + + def rate_default(self, x, t, dt=1.0): + return ( + self.B + * torch.sum( + torch.exp(-(x + 1)) * torch.sin(2 * x * np.pi) ** 2, dim=1 + ).view(-1, 1) + + self.b + ) * dt + + def rate_volume(self, S, t, dt=1, rate=None): + if self.rate_volume_f is None: + # integrate rate numerically over S + import scipy.integrate as integrate + + if rate is None: + rate = self.rate + else: + rate = rate + integral = 0 + if self.d == 1: + # integrate = S.volume()* self.rate(torch.from_numpy(S.bounds[0,1]).view(1)) + integral, _ = integrate.quad( + lambda x: rate(torch.Tensor([x]).view(1, 1), t).numpy(), + float(S.bounds[0, 0]), + float(S.bounds[0, 1]), + ) + elif self.d == 2: + integrand = lambda x, y: rate( + torch.Tensor([x, y], t).view(1, 2).double() + ).numpy() + integral, _ = integrate.dblquad( + integrand, + float(S.bounds[0, 0]), + float(S.bounds[0, 1]), + lambda x: float(S.bounds[1, 0]), + lambda x: float(S.bounds[1, 1]), + ) + + return integral * dt + else: + return self.rate_volume_f(S) * dt + + def sample(self, S, t, dt=1.0, verbose=False, rate=None): + """ + + :param S: set where it should be sampled + :return: + """ + if self.exact == True: + return self.sample_discretized(S, t, dt=dt) + else: + + lam = self.rate_volume(S, t, dt) + n = np.random.poisson(lam=lam) + new_sample = [] + vol = S.volume() + size = 0 + + alpha = 1.0 / lam + + while size < n: + # uniform sample g(s) = 1/vol(S) + sample = S.uniform_sample(1) + + t = self.rate(sample, t) / (alpha * lam) + p = np.random.uniform(0, 1) + if p < t: + new_sample.append(sample.view(1, -1)) + size = size + 1 + + if len(new_sample) > 1: + x = torch.cat(new_sample, dim=0) + else: + return None + return x + + def sample_discretized(self, S, t, dt, n=50): + lam = float(self.rate_volume(S, t, dt)) + count = np.random.poisson(lam=lam) + if count > 0: + x = S.return_discretization(n) + r = self.rate(x, t) * dt + sample = torch.from_numpy( + np.random.choice( + np.arange(0, x.size()[0], 1), + size=count, + p=(r / torch.sum(r)).numpy().reshape(-1), + ) + ) + return x[sample, :] + else: + return None diff --git a/stpy/probability/bernoulli_likelihood.py b/stpy/probability/bernoulli_likelihood.py index 2630337..14148e4 100644 --- a/stpy/probability/bernoulli_likelihood.py +++ b/stpy/probability/bernoulli_likelihood.py @@ -6,79 +6,103 @@ from stpy.probability.gaussian_likelihood import GaussianLikelihood import scipy + class BernoulliLikelihoodCanonical(GaussianLikelihood): def __init__(self): super().__init__() - def evaluate_datapoint(self, theta, d, mask = None): + def evaluate_datapoint(self, theta, d, mask=None): if mask is None: - mask = 1. + mask = 1.0 x, y = d - r = -y*(x@theta) + torch.log(1+torch.exp(x@theta)) + r = -y * (x @ theta) + torch.log(1 + torch.exp(x @ theta)) r = r * mask return r def link(self, s): - return 1./(1.+ torch.exp(-s)) + return 1.0 / (1.0 + torch.exp(-s)) - def scale(self, mask = None): - return 1. + def scale(self, mask=None): + return 1.0 - def get_objective_cvxpy(self, mask = None): + def get_objective_cvxpy(self, mask=None): if mask is None: + def likelihood(theta): - return -self.y.T@(self.x @ theta) + cp.sum(cp.logistic(self.x @ theta)) + return -self.y.T @ (self.x @ theta) + cp.sum( + cp.logistic(self.x @ theta) + ) + else: + def likelihood(theta): - if torch.sum(mask.double())>1e-8: - return -(mask*self.y)@(self.x @ theta) + mask @ cp.logistic(self.x @ theta) + if torch.sum(mask.double()) > 1e-8: + return -(mask * self.y) @ (self.x @ theta) + mask @ cp.logistic( + self.x @ theta + ) else: - return cp.sum(theta*0) + return cp.sum(theta * 0) + return likelihood def lipschitz_constant(self, b): return np.exp(b) - def get_confidence_set_cvxpy(self, - theta: cp.Variable, - type: Union[str, None] = None, - params: Dict = {}, - delta: float = 0.1): + def get_confidence_set_cvxpy( + self, + theta: cp.Variable, + type: Union[str, None] = None, + params: Dict = {}, + delta: float = 0.1, + ): if self.fitted == True: return self.set_fn(theta) - theta_fit = params['estimate'] - H = params['regularizer_hessian'] + theta_fit = params["estimate"] + H = params["regularizer_hessian"] lam = torch.max(torch.linalg.eigvalsh(H)) - B = params['bound'] - d_eff = params['d_eff'] + B = params["bound"] + d_eff = params["d_eff"] - if type in ['faubry']: - D = torch.diag(1./(self.x @ theta_fit).view(-1)) + if type in ["faubry"]: + D = torch.diag(1.0 / (self.x @ theta_fit).view(-1)) V = self.x.T @ D @ self.x + H - beta = np.sqrt(lam*B) / 2. + 2. / np.sqrt(lam*B) * (torch.logdet(V) - torch.logdet(H)) + 2 / np.sqrt( - lam*B) * np.log(1 / delta) * d_eff + beta = ( + np.sqrt(lam * B) / 2.0 + + 2.0 / np.sqrt(lam * B) * (torch.logdet(V) - torch.logdet(H)) + + 2 / np.sqrt(lam * B) * np.log(1 / delta) * d_eff + ) L = torch.from_numpy(scipy.linalg.sqrtm(V.numpy())) - self.set_fn = lambda theta: [cp.sum_squares(L @ (theta - theta_fit)) <= beta] + self.set_fn = lambda theta: [ + cp.sum_squares(L @ (theta - theta_fit)) <= beta + ] set = self.set_fn(theta) - elif type in ['laplace']: - sigma = 1./4. + elif type in ["laplace"]: + sigma = 1.0 / 4.0 V = self.x.T @ self.x / sigma**2 + H L = torch.from_numpy(scipy.linalg.sqrtm(V.numpy())) - beta = 2. * self.lipschitz_constant(B) - self.set_fn = lambda theta: [cp.sum_squares(L @ (theta - theta_fit)) <= beta] + beta = 2.0 * self.lipschitz_constant(B) + self.set_fn = lambda theta: [ + cp.sum_squares(L @ (theta - theta_fit)) <= beta + ] set = self.set_fn(theta) elif type in ["adaptive-AB"]: - sigma = 1./4. + sigma = 1.0 / 4.0 V = self.x.T @ self.x / sigma**2 + H L = torch.from_numpy(scipy.linalg.sqrtm(V.numpy())) - beta = 2 * np.log(1. / delta) + (torch.logdet(V + H) - torch.logdet(H)) + lam * B - self.set_fn = lambda theta: [cp.sum_squares(L@(theta - theta_fit)) <= beta] + beta = ( + 2 * np.log(1.0 / delta) + + (torch.logdet(V + H) - torch.logdet(H)) + + lam * B + ) + self.set_fn = lambda theta: [ + cp.sum_squares(L @ (theta - theta_fit)) <= beta + ] set = self.set_fn(theta) elif type == "LR": @@ -86,7 +110,9 @@ def get_confidence_set_cvxpy(self, set = self.lr_confidence_set_cvxpy(theta, beta, params) else: - raise NotImplementedError("The desired confidence set type is not supported.") + raise NotImplementedError( + "The desired confidence set type is not supported." + ) self.set = set self.fitted = True @@ -94,35 +120,43 @@ def get_confidence_set_cvxpy(self, return set def information_matrix(self): - V = self.x.T@self.x/self.sigma + V = self.x.T @ self.x / self.sigma return V - def confidence_parameter(self, delta, params, type = None): - H = params['regularizer_hessian'] + def confidence_parameter(self, delta, params, type=None): + H = params["regularizer_hessian"] lam = torch.max(torch.linalg.eigvalsh(H)) - B = params['bound'] - d_eff = params['d_eff'] + B = params["bound"] + d_eff = params["d_eff"] if type is None or type == "none" or type == "laplace": # this is a common heuristic - beta = 2.0 + beta = 2.0 elif type == "adaptive-AB": - sigma = 1./4. - V = self.x.T @ self.x / sigma ** 2 + H - beta = 2 * np.log(1. / delta) + (torch.logdet(V + H) - torch.logdet(H)) + lam * B + sigma = 1.0 / 4.0 + V = self.x.T @ self.x / sigma**2 + H + beta = ( + 2 * np.log(1.0 / delta) + + (torch.logdet(V + H) - torch.logdet(H)) + + lam * B + ) elif type == "LR": # this is based on sequential LR test beta = self.confidence_parameter_likelihood_ratio(delta, params) elif type == "Faubry": - H = params['regularizer_hessian'] - lam = H[0., 0] - theta_fit = params['estimate'] - D = torch.diag(1./(self.x @ theta_fit).view(-1)) + H = params["regularizer_hessian"] + lam = H[0.0, 0] + theta_fit = params["estimate"] + D = torch.diag(1.0 / (self.x @ theta_fit).view(-1)) V = self.x.T @ D @ self.x + H - beta = np.sqrt(lam)/2. + 2./np.sqrt(lam)*(torch.logdet(V) - torch.logdet(H)) + 2/np.sqrt(lam)* np.log(1/delta)*d_eff + beta = ( + np.sqrt(lam) / 2.0 + + 2.0 / np.sqrt(lam) * (torch.logdet(V) - torch.logdet(H)) + + 2 / np.sqrt(lam) * np.log(1 / delta) * d_eff + ) else: raise NotImplementedError("Not implemented") return beta diff --git a/stpy/probability/gaussian_likelihood.py b/stpy/probability/gaussian_likelihood.py index bdbf2fe..53a66e7 100644 --- a/stpy/probability/gaussian_likelihood.py +++ b/stpy/probability/gaussian_likelihood.py @@ -5,24 +5,27 @@ from stpy.probability.likelihood import Likelihood import scipy + class GaussianLikelihood(Likelihood): - def __init__(self, sigma = 0.1, Sigma=None): + def __init__(self, sigma=0.1, Sigma=None): super().__init__() self.sigma = sigma self.Sigma = Sigma - def scale(self, err = None, bound = None): + def scale(self, err=None, bound=None): if self.Sigma is None: return self.sigma**2 else: - return torch.max(self.Sigma.T@self.Sigma) + return torch.max(self.Sigma.T @ self.Sigma) def evaluate_log(self, f): if self.Sigma is None: - res = torch.sum((f - self.y)**2)/self.sigma**2 + res = torch.sum((f - self.y) ** 2) / self.sigma**2 else: - res = ((f - self.y).T @ torch.inverse(self.Sigma.T@self.Sigma) @ (f - self.y) ) + res = ( + (f - self.y).T @ torch.inverse(self.Sigma.T @ self.Sigma) @ (f - self.y) + ) return res def load_data(self, D): @@ -30,81 +33,119 @@ def load_data(self, D): self.fitted = False def add_data_point(self, d): - x,y = d - self.x = torch.vstack(self.x,x) - self.y = torch.vstack(self.y,y) + x, y = d + self.x = torch.vstack(self.x, x) + self.y = torch.vstack(self.y, y) self.fitted = False - def evaluate_datapoint(self, theta, d, mask = None): - x,y = d + def evaluate_datapoint(self, theta, d, mask=None): + x, y = d if mask is None: - mask = 1. + mask = 1.0 if self.Sigma is None: - return mask*((x @ theta - y) ** 2)/ (2*self.sigma ** 2) + return mask * ((x @ theta - y) ** 2) / (2 * self.sigma**2) else: - return mask*(x @ theta - y).T @ torch.linalg.inv(self.Sigma.T @ self.Sigma) @ ( - x @ theta - y) + return ( + mask + * (x @ theta - y).T + @ torch.linalg.inv(self.Sigma.T @ self.Sigma) + @ (x @ theta - y) + ) def normalization(self, d): - return 1./np.sqrt(2.*np.pi*self.sigma**2) + return 1.0 / np.sqrt(2.0 * np.pi * self.sigma**2) def get_objective_torch(self): if self.Sigma is None: - def likelihood(theta): return torch.sum((self.x@theta - self.y)**2)/(2*self.sigma**2) + + def likelihood(theta): + return torch.sum((self.x @ theta - self.y) ** 2) / (2 * self.sigma**2) else: - def likelihood(theta): return (self.x@theta - self.y).T@torch.linalg.inv(self.Sigma.T@self.Sigma*2)@(self.x@theta - self.y) + + def likelihood(theta): + return ( + (self.x @ theta - self.y).T + @ torch.linalg.inv(self.Sigma.T @ self.Sigma * 2) + @ (self.x @ theta - self.y) + ) + return likelihood - def get_objective_cvxpy(self, mask = None): + def get_objective_cvxpy(self, mask=None): if mask is None: if self.Sigma is None: - def likelihood(theta): return cp.sum_squares(self.x@theta - self.y)/(2*self.sigma**2) + + def likelihood(theta): + return cp.sum_squares(self.x @ theta - self.y) / (2 * self.sigma**2) else: - def likelihood(theta): return cp.matrix_frac(self.x@theta - self.y,2*self.Sigma.T@self.Sigma) + + def likelihood(theta): + return cp.matrix_frac( + self.x @ theta - self.y, 2 * self.Sigma.T @ self.Sigma + ) + else: if self.Sigma is None: + def likelihood(theta): - if torch.sum(mask.int())>1e-8: - return cp.sum_squares(cp.multiply(mask.double().view(-1,1),(self.x @ theta - self.y)) )/ (2*self.sigma ** 2) + if torch.sum(mask.int()) > 1e-8: + return cp.sum_squares( + cp.multiply( + mask.double().view(-1, 1), (self.x @ theta - self.y) + ) + ) / (2 * self.sigma**2) else: - return cp.sum(theta*0) + return cp.sum(theta * 0) else: + def likelihood(theta): - if torch.sum(mask.int())>1e-8: - return cp.matrix_frac(cp.multiply(mask.double().view(-1,1),(self.x @ theta - self.y)), 2*self.Sigma.T @ self.Sigma) + if torch.sum(mask.int()) > 1e-8: + return cp.matrix_frac( + cp.multiply( + mask.double().view(-1, 1), (self.x @ theta - self.y) + ), + 2 * self.Sigma.T @ self.Sigma, + ) else: - return cp.sum(theta*0) + return cp.sum(theta * 0) + return likelihood - def information_matrix(self, mask = None): + def information_matrix(self, mask=None): if mask is None: if self.Sigma is None: - V = self.x.T@self.x/(2*self.sigma**2) + V = self.x.T @ self.x / (2 * self.sigma**2) else: - V = self.x.T@torch.linalg.inv(self.Sigma.T@self.Sigma*2)@self.x + V = self.x.T @ torch.linalg.inv(self.Sigma.T @ self.Sigma * 2) @ self.x return V else: if self.Sigma is None: - V = self.x[mask,:].T@self.x[mask,:]/(2*self.sigma**2) + V = self.x[mask, :].T @ self.x[mask, :] / (2 * self.sigma**2) else: - V = self.x[mask,:].T@torch.linalg.inv(self.Sigma.T@self.Sigma*2)@self.x[mask,:] + V = ( + self.x[mask, :].T + @ torch.linalg.inv(self.Sigma.T @ self.Sigma * 2) + @ self.x[mask, :] + ) return V - def get_confidence_set_cvxpy(self, - theta: cp.Variable, - type: Union[str,None] = None, - params: Dict = {}, - delta: float = 0.1): + def get_confidence_set_cvxpy( + self, + theta: cp.Variable, + type: Union[str, None] = None, + params: Dict = {}, + delta: float = 0.1, + ): if self.fitted == True: return self.set_fn(theta) - theta_fit = params['estimate'] - H = params['regularizer_hessian'] + theta_fit = params["estimate"] + H = params["regularizer_hessian"] if H is not None: V = self.information_matrix() + H @@ -112,23 +153,29 @@ def get_confidence_set_cvxpy(self, V = self.information_matrix() if type in ["none", None, "fixed"]: -# L = torch.linalg.cholesky(V).double() + # L = torch.linalg.cholesky(V).double() L = torch.from_numpy(scipy.linalg.sqrtm(V.numpy())) beta = self.confidence_parameter(delta, params, type=type) - self.set_fn = lambda theta: [cp.sum_squares(L@(theta - theta_fit)) <= beta] + self.set_fn = lambda theta: [ + cp.sum_squares(L @ (theta - theta_fit)) <= beta + ] set = self.set_fn(theta) elif type in ["adaptive-AB"]: L = torch.from_numpy(scipy.linalg.sqrtm(V.numpy())) beta = self.confidence_parameter(delta, params, type=type) - self.set_fn = lambda theta: [cp.sum_squares(L@(theta - theta_fit)) <= beta] + self.set_fn = lambda theta: [ + cp.sum_squares(L @ (theta - theta_fit)) <= beta + ] set = self.set_fn(theta) elif type in ["adaptive-optimized"]: beta = self.confidence_parameter(delta, params, type=type) sqrtV = scipy.linalg.sqrtm(V) - L = torch.linalg.cholesky(V+sqrtV).double() - self.set_fn = lambda theta: [cp.sum_squares(L @ (theta - theta_fit)) <= beta] + L = torch.linalg.cholesky(V + sqrtV).double() + self.set_fn = lambda theta: [ + cp.sum_squares(L @ (theta - theta_fit)) <= beta + ] elif type == "LR": beta = self.confidence_parameter_likelihood_ratio(delta, params) @@ -139,20 +186,22 @@ def get_confidence_set_cvxpy(self, beta = self.confidence_parameter_prior_posterior(delta, params) set = self.prior_posterior_lr_confidence_set_cvxpy(theta, beta, params) else: - raise NotImplementedError("The desired confidence set type is not supported.") - print (type, "USING BETA: ", beta) + raise NotImplementedError( + "The desired confidence set type is not supported." + ) + print(type, "USING BETA: ", beta) self.set = set self.fitted = True return set - def confidence_parameter(self, delta, params, type = None): - print (type) + def confidence_parameter(self, delta, params, type=None): + print(type) if type is None or type == "none": # this is a common heuristic - beta = 2.0 * np.log(1/delta) + beta = 2.0 * np.log(1 / delta) # elif type == "LR" or type == "LR-vovk": # # this is based on sequential LR test @@ -160,26 +209,37 @@ def confidence_parameter(self, delta, params, type = None): # beta = self.confidence_parameter_likelihood_ratio(delta, params) else: - if 'd_eff' in params.keys(): + if "d_eff" in params.keys(): n = self.x.size()[0] - d = params['d_eff'] + d = params["d_eff"] else: - d = params['m'] + d = params["m"] - B = params['bound'] - H = params['regularizer_hessian'] + B = params["bound"] + H = params["regularizer_hessian"] lam = torch.max(torch.linalg.eigvalsh(H)) if type == "fixed": # this is fixed design - beta = d + 2 * np.log(1 / delta) + 2 * np.sqrt(d * np.log(1 / delta)) + lam*B + beta = ( + d + + 2 * np.log(1 / delta) + + 2 * np.sqrt(d * np.log(1 / delta)) + + lam * B + ) elif type == "adaptive-AB": - print ("calculating: adaptive-AB") + print("calculating: adaptive-AB") # this takes the pseudo-maximization with a fixed mixture V = self.information_matrix() - beta = 2*np.log(1./delta) + (torch.logdet(V+H) - torch.logdet(H)) + lam*B + beta = ( + 2 * np.log(1.0 / delta) + + (torch.logdet(V + H) - torch.logdet(H)) + + lam * B + ) else: - raise NotImplementedError("The desired confidence set type is not supported.") + raise NotImplementedError( + "The desired confidence set type is not supported." + ) - return beta \ No newline at end of file + return beta diff --git a/stpy/probability/huber_likelihood.py b/stpy/probability/huber_likelihood.py index f9321c7..66f7180 100644 --- a/stpy/probability/huber_likelihood.py +++ b/stpy/probability/huber_likelihood.py @@ -8,7 +8,7 @@ class HuberLikelihood(GaussianLikelihood): - def __init__(self, sigma=0.1, M=1.): + def __init__(self, sigma=0.1, M=1.0): super().__init__() self.sigma = sigma self.M = M @@ -16,23 +16,23 @@ def __init__(self, sigma=0.1, M=1.): def evaluate_log(self, f): pass - def scale(self, err = None): + def scale(self, err=None): if self.Sigma is None: return self.sigma**2 else: - return torch.max(self.Sigma.T@self.Sigma) + return torch.max(self.Sigma.T @ self.Sigma) - def evaluate_datapoint(self, theta, d, mask = None): + def evaluate_datapoint(self, theta, d, mask=None): if mask is None: - mask = 1. + mask = 1.0 x, y = d res = (x @ theta - y) / self.sigma mask1 = torch.abs(res) < self.M mask2 = torch.abs(res) >= self.M v = res v[mask1] = res[mask1] ** 2 - v[mask2] = 2 * self.M * torch.abs(res[mask2]) - self.M ** 2 - return torch.sum(v)*mask + v[mask2] = 2 * self.M * torch.abs(res[mask2]) - self.M**2 + return torch.sum(v) * mask def add_data_point(self, d): x, y = d @@ -46,20 +46,27 @@ def load_data(self, D): def get_objective_cvxpy(self, mask=None): if mask is None: + def likelihood(theta): return cp.sum(cp.huber((self.x @ theta - self.y) / self.sigma)) + else: + def likelihood(theta): if torch.sum(mask.int()) > 0: - return cp.sum(cp.huber((self.x[mask, :] @ theta - self.y[mask, :]) / self.sigma)) + return cp.sum( + cp.huber( + (self.x[mask, :] @ theta - self.y[mask, :]) / self.sigma + ) + ) else: return cp.sum(theta * 0) + return likelihood def information_matrix(self): V = self.x.T @ self.x / self.sigma return V - def get_objective_torch(self): raise NotImplementedError("Implement me please.") diff --git a/stpy/probability/laplace_likelihood.py b/stpy/probability/laplace_likelihood.py index 732c82c..afb4912 100644 --- a/stpy/probability/laplace_likelihood.py +++ b/stpy/probability/laplace_likelihood.py @@ -6,62 +6,76 @@ from stpy.probability.likelihood import Likelihood from stpy.probability.gaussian_likelihood import GaussianLikelihood + class LaplaceLikelihood(GaussianLikelihood): - def __init__(self, b = 0.1): + def __init__(self, b=0.1): super().__init__() self.b = b - def scale(self, err = None, bound = None): + def scale(self, err=None, bound=None): return self.b def evaluate_log(self, f): - res = torch.sum(torch.abs(f - self.y))/self.b + res = torch.sum(torch.abs(f - self.y)) / self.b return res - def evaluate_datapoint(self, theta, d, mask = None): + def evaluate_datapoint(self, theta, d, mask=None): if mask is None: - mask = 1. + mask = 1.0 x, y = d - return mask* (torch.abs(x @ theta - y)) / self.b + return mask * (torch.abs(x @ theta - y)) / self.b - def get_objective_cvxpy(self, mask = None): + def get_objective_cvxpy(self, mask=None): if mask is None: - def likelihood(theta): return cp.sum(cp.abs(self.x@theta - self.y)/self.b) + + def likelihood(theta): + return cp.sum(cp.abs(self.x @ theta - self.y) / self.b) + else: + def likelihood(theta): - if torch.sum(mask.int())>0: - return cp.sum(cp.abs(self.x[mask,:]@theta - self.y[mask,:])/self.b) + if torch.sum(mask.int()) > 0: + return cp.sum( + cp.abs(self.x[mask, :] @ theta - self.y[mask, :]) / self.b + ) else: - return cp.sum(theta*0) + return cp.sum(theta * 0) + return likelihood - def get_confidence_set_cvxpy(self, - theta: cp.Variable, - type: Union[str, None] = None, - params: Dict = {}, - delta: float = 0.1): + def get_confidence_set_cvxpy( + self, + theta: cp.Variable, + type: Union[str, None] = None, + params: Dict = {}, + delta: float = 0.1, + ): if self.fitted == True: return self.set_fn(theta) - theta_fit = params['estimate'] - H = params['regularizer_hessian'] + theta_fit = params["estimate"] + H = params["regularizer_hessian"] if H is not None: V = self.information_matrix() + H else: V = self.information_matrix() - if type in ["none","sub-exp"]: + if type in ["none", "sub-exp"]: L = torch.from_numpy(scipy.linalg.sqrtm(V.numpy())) beta = self.confidence_parameter(delta, params, type=type) - self.set_fn = lambda theta: [cp.sum_squares(L @ (theta - theta_fit)) <= beta] + self.set_fn = lambda theta: [ + cp.sum_squares(L @ (theta - theta_fit)) <= beta + ] set = self.set_fn(theta) elif type == "adaptive-AB": L = torch.from_numpy(scipy.linalg.sqrtm(V.numpy())) beta = self.confidence_parameter(delta, params, type=type) - self.set_fn = lambda theta: [cp.sum_squares(L @ (theta - theta_fit)) <= beta] + self.set_fn = lambda theta: [ + cp.sum_squares(L @ (theta - theta_fit)) <= beta + ] set = self.set_fn(theta) elif type == "LR": @@ -69,7 +83,9 @@ def get_confidence_set_cvxpy(self, set = self.lr_confidence_set_cvxpy(theta, beta, params) else: - raise NotImplementedError("The desired confidence set type is not supported.") + raise NotImplementedError( + "The desired confidence set type is not supported." + ) print(type, "USING BETA: ", beta) self.set = set @@ -78,49 +94,62 @@ def get_confidence_set_cvxpy(self, return set def information_matrix(self): - V = self.x.T@self.x/(2*self.b)**2 + V = self.x.T @ self.x / (2 * self.b) ** 2 return V - - def get_objective_torch(self, mask = None): + def get_objective_torch(self, mask=None): if mask is None: - def likelihood(theta): return torch.sum(torch.abs(self.x@theta - self.y)/self.sigma) + + def likelihood(theta): + return torch.sum(torch.abs(self.x @ theta - self.y) / self.sigma) + else: + def likelihood(theta): - if torch.sum(mask.int())>0: - return torch.sum(torch.abs(self.x[mask,:]@theta - self.y[mask,:])/self.sigma) + if torch.sum(mask.int()) > 0: + return torch.sum( + torch.abs(self.x[mask, :] @ theta - self.y[mask, :]) + / self.sigma + ) else: - return torch.sum(theta*0) - return likelihood - + return torch.sum(theta * 0) + return likelihood - def confidence_parameter(self, delta, params, type = None): - print (type) + def confidence_parameter(self, delta, params, type=None): + print(type) if type is None or type == "none": - beta = 2.0 * np.log(1/delta) + beta = 2.0 * np.log(1 / delta) else: - if 'd_eff' in params.keys(): + if "d_eff" in params.keys(): n = self.x.size()[0] - d = params['d_eff'] + d = params["d_eff"] else: - d = params['m'] + d = params["m"] - B = params['bound'] - H = params['regularizer_hessian'] + B = params["bound"] + H = params["regularizer_hessian"] lam = torch.max(torch.linalg.eigvalsh(H)) if type == "sub-exp": # this takes the pseudo-maximization with a fixed mixture V = self.information_matrix() - L = 1. + L = 1.0 size = V.size()[0] - beta = (lam*(B + self.b/L) + L/(self.b*np.sqrt(lam))*(d*np.log(2)+np.log(1./delta)+0.5*torch.slogdet(V*lam+torch.eye(size))[1])) + beta = lam * (B + self.b / L) + L / (self.b * np.sqrt(lam)) * ( + d * np.log(2) + + np.log(1.0 / delta) + + 0.5 * torch.slogdet(V * lam + torch.eye(size))[1] + ) elif type == "adaptive-AB": V = self.information_matrix() - beta = 2*np.log(1./delta) + (torch.logdet(V+H) - torch.logdet(H)) + lam*B + beta = ( + 2 * np.log(1.0 / delta) + + (torch.logdet(V + H) - torch.logdet(H)) + + lam * B + ) else: raise NotImplementedError("given confidence sets are not implemented.") - return beta \ No newline at end of file + return beta diff --git a/stpy/probability/likelihood.py b/stpy/probability/likelihood.py index c950cc0..586f9ba 100644 --- a/stpy/probability/likelihood.py +++ b/stpy/probability/likelihood.py @@ -3,6 +3,7 @@ import numpy as np import torch + class Likelihood(ABC): def __init__(self): @@ -14,7 +15,7 @@ def evaluate_log(self, f): pass @abstractmethod - def scale(self, err = None, bound = None): + def scale(self, err=None, bound=None): return @abstractmethod @@ -22,7 +23,7 @@ def normalization(self, d): return @abstractmethod - def evaluate_datapoint(self, f, d, mask = None): + def evaluate_datapoint(self, f, d, mask=None): pass @abstractmethod @@ -33,20 +34,18 @@ def get_confidence_set_cvxpy(self, theta, type, params, delta): def information_matrix(self, theta_fit): pass - @abstractmethod - def get_objective_cvxpy(self, mask = None): + def get_objective_cvxpy(self, mask=None): pass @abstractmethod def get_objective_torch(self): pass - def add_data_point(self, d): - x,y = d - self.x = torch.vstack(self.x,x) - self.y = torch.vstack(self.y,y) + x, y = d + self.x = torch.vstack(self.x, x) + self.y = torch.vstack(self.y, y) self.fitted = False def load_data(self, D): @@ -60,18 +59,18 @@ def confidence_parameter_likelihood_ratio(self, delta, params): :param params: :return: """ - evidence = params['evidence'] - estimators = params['estimator_sequence'] + evidence = params["evidence"] + estimators = params["estimator_sequence"] - val = 0. - for i in range(len(estimators)-1): + val = 0.0 + for i in range(len(estimators) - 1): ev = evidence[i] est = estimators[i] if est is not None: - xx = self.x[i,:].view(1,-1) - yy = self.y[i,:].view(1,-1) - val += self.evaluate_datapoint(est, (xx, yy), mask = ev) - val = np.log(1/delta) + val + xx = self.x[i, :].view(1, -1) + yy = self.y[i, :].view(1, -1) + val += self.evaluate_datapoint(est, (xx, yy), mask=ev) + val = np.log(1 / delta) + val return val def lr_confidence_set_cvxpy(self, theta, beta, params): @@ -82,20 +81,24 @@ def lr_confidence_set_cvxpy(self, theta, beta, params): :param params: :return: """ - evidence = torch.Tensor(params['evidence']).bool() - self.set_fn = lambda theta: [self.get_objective_cvxpy(mask = evidence)(theta) <= beta] + evidence = torch.Tensor(params["evidence"]).bool() + self.set_fn = lambda theta: [ + self.get_objective_cvxpy(mask=evidence)(theta) <= beta + ] set = self.set_fn(theta) return set - - def confidence_parameter_prior_posterior(self, delta,params): - H = params['regularizer_hessian'] - sigma = params['sigma'] + def confidence_parameter_prior_posterior(self, delta, params): + H = params["regularizer_hessian"] + sigma = params["sigma"] n = self.x.size()[0] - K = (self.x@self.x.T + torch.max(H)*sigma**2*torch.eye(n)) - evidence_of_the_data = -0.5*self.y.T@torch.linalg.solve(K,self.y)-0.5*torch.linalg.slogdet(K)[1]#-(n/2)*np.log(2*np.pi) ## remove this as in likelihood not added - evidence_of_the_data = evidence_of_the_data #- np.log(2*np.pi*sigma**2) - return np.log(1./delta) - evidence_of_the_data + K = self.x @ self.x.T + torch.max(H) * sigma**2 * torch.eye(n) + evidence_of_the_data = ( + -0.5 * self.y.T @ torch.linalg.solve(K, self.y) + - 0.5 * torch.linalg.slogdet(K)[1] + ) # -(n/2)*np.log(2*np.pi) ## remove this as in likelihood not added + evidence_of_the_data = evidence_of_the_data # - np.log(2*np.pi*sigma**2) + return np.log(1.0 / delta) - evidence_of_the_data def prior_posterior_lr_confidence_set_cvxpy(self, theta, beta, params): """ @@ -106,11 +109,11 @@ def prior_posterior_lr_confidence_set_cvxpy(self, theta, beta, params): :return: """ # create a Gaussian likelihood - sigma = params['sigma'] - def gauss_likelihood(theta): return cp.sum_squares(self.x @ theta - self.y) / (2 * sigma ** 2) - self.set_fn = lambda theta: [gauss_likelihood(theta)<= beta] - set = self.set_fn(theta) - return set - + sigma = params["sigma"] + def gauss_likelihood(theta): + return cp.sum_squares(self.x @ theta - self.y) / (2 * sigma**2) + self.set_fn = lambda theta: [gauss_likelihood(theta) <= beta] + set = self.set_fn(theta) + return set diff --git a/stpy/probability/noise_models.py b/stpy/probability/noise_models.py index d736646..19db7a8 100644 --- a/stpy/probability/noise_models.py +++ b/stpy/probability/noise_models.py @@ -9,366 +9,421 @@ class NoiseModel(ABC): - """ - Class provides an interface to sample noise observations and evaluate their likelihood - """ - def __init__(self): - pass - - @abstractmethod - def sample(self, xs, theta): - pass - - @abstractmethod - def sample_noise(self, xs): - pass - - def joint_log_likelihood(self, ys, xs, theta: Union[np.array, cp.Variable]) -> Union[np.array, cp.Expression]: - """ Returns the sum of the lls, i.e. the joint ll""" - if isinstance(theta, cp.Variable): - return cp.sum(self.log_likelihood(ys, xs, theta)) - else: - return np.sum(self.log_likelihood(ys, xs, theta)) - - - - def get_mosek_params(self, threads=4): - if self.convex: - return { - mosek.iparam.num_threads: threads, - mosek.iparam.intpnt_solve_form: mosek.solveform.primal, - mosek.dparam.intpnt_co_tol_pfeas: 1e-4, - mosek.dparam.intpnt_co_tol_dfeas: 1e-4, - mosek.dparam.intpnt_co_tol_rel_gap: 1e-4 - } - else: - raise AttributeError("Fetching mosek parameters disallowed for non-convex problems") - - @abstractmethod - def convex(self) -> bool: - pass + """ + Class provides an interface to sample noise observations and evaluate their likelihood + """ + + def __init__(self): + pass + + @abstractmethod + def sample(self, xs, theta): + pass + + @abstractmethod + def sample_noise(self, xs): + pass + + def joint_log_likelihood( + self, ys, xs, theta: Union[np.array, cp.Variable] + ) -> Union[np.array, cp.Expression]: + """Returns the sum of the lls, i.e. the joint ll""" + if isinstance(theta, cp.Variable): + return cp.sum(self.log_likelihood(ys, xs, theta)) + else: + return np.sum(self.log_likelihood(ys, xs, theta)) + + def get_mosek_params(self, threads=4): + if self.convex: + return { + mosek.iparam.num_threads: threads, + mosek.iparam.intpnt_solve_form: mosek.solveform.primal, + mosek.dparam.intpnt_co_tol_pfeas: 1e-4, + mosek.dparam.intpnt_co_tol_dfeas: 1e-4, + mosek.dparam.intpnt_co_tol_rel_gap: 1e-4, + } + else: + raise AttributeError( + "Fetching mosek parameters disallowed for non-convex problems" + ) + + @abstractmethod + def convex(self) -> bool: + pass class AdditiveHomoscedasticNoiseModel(NoiseModel): - """ - Assume a linear model. Only thing left to implement is the eta log-likelihood in both cvxpy and numpy + """ + Assume a linear model. Only thing left to implement is the eta log-likelihood in both cvxpy and numpy - TODO discuss whether xs @ theta should be replaced by a f_noiseless type function you can pass at initialization? - """ - @abstractmethod - def sample_noise(self, xs): - """ pass xs in order to know how large noise should be. Also able to deal with heteroscedastic later on """ - pass + TODO discuss whether xs @ theta should be replaced by a f_noiseless type function you can pass at initialization? + """ - def sample(self, xs, theta): - return xs @ theta + self.sample_noise(xs) + @abstractmethod + def sample_noise(self, xs): + """pass xs in order to know how large noise should be. Also able to deal with heteroscedastic later on""" + pass - def log_likelihood(self, ys, xs, theta): # TODO change base class - if ys.shape[0] == 0: - return 0. # this is to avoid problems with cvxpy variables of size 0, which it doesn't like - if isinstance(theta, cp.Variable): - return self.cvxpy_noise_log_likelihood(ys - (xs @ theta)) - else: - return self.noise_log_likelihood(ys - (xs @ theta)) + def sample(self, xs, theta): + return xs @ theta + self.sample_noise(xs) + def log_likelihood(self, ys, xs, theta): # TODO change base class + if ys.shape[0] == 0: + return 0.0 # this is to avoid problems with cvxpy variables of size 0, which it doesn't like + if isinstance(theta, cp.Variable): + return self.cvxpy_noise_log_likelihood(ys - (xs @ theta)) + else: + return self.noise_log_likelihood(ys - (xs @ theta)) class PoissonNoise(NoiseModel): - def __init__(self, lam): - self.lam = lam + def __init__(self, lam): + self.lam = lam - def sample_noise(self, xs): - return torch.poisson(self.lam(xs).view(-1)).view(-1,1) - def convex(self) -> bool: - pass + def sample_noise(self, xs): + return torch.poisson(self.lam(xs).view(-1)).view(-1, 1) - def sample(self, xs, theta): - pass + def convex(self) -> bool: + pass - def mean(self, xs): - return self.lam(xs) -class GaussianNoise(AdditiveHomoscedasticNoiseModel): - def __init__(self, sigma=0.1): - """ - :param sigma: standard deviation - """ - super().__init__() - self.sigma = sigma + def sample(self, xs, theta): + pass - def sample_noise(self, xs): - return self.sigma*np.random.normal(scale=1.0, size=(xs.shape[0], 1)) + def mean(self, xs): + return self.lam(xs) - def noise_log_likelihood(self, etas, xs=None): - return -(0.5*((etas) ** 2))/(self.sigma ** 2) - 0.5*np.log(2*np.pi*(self.sigma**2)) - def cvxpy_noise_log_likelihood(self, etas, xs=None): - return -0.5 * cp.square(etas) / (self.sigma ** 2) - 0.5*np.log(2 * np.pi * self.sigma ** 2) +class GaussianNoise(AdditiveHomoscedasticNoiseModel): + def __init__(self, sigma=0.1): + """ + :param sigma: standard deviation + """ + super().__init__() + self.sigma = sigma - @property - def convex(self) -> bool: - return True + def sample_noise(self, xs): + return self.sigma * np.random.normal(scale=1.0, size=(xs.shape[0], 1)) - def __str__(self): - return "GaussianAdditive" + def noise_log_likelihood(self, etas, xs=None): + return -(0.5 * ((etas) ** 2)) / (self.sigma**2) - 0.5 * np.log( + 2 * np.pi * (self.sigma**2) + ) + def cvxpy_noise_log_likelihood(self, etas, xs=None): + return -0.5 * cp.square(etas) / (self.sigma**2) - 0.5 * np.log( + 2 * np.pi * self.sigma**2 + ) + @property + def convex(self) -> bool: + return True -class HuberNoise(AdditiveHomoscedasticNoiseModel): - def __init__(self, sigma=0.1): - """ - :param sigma: standard deviation - """ - super().__init__() - self.sigma = sigma + def __str__(self): + return "GaussianAdditive" - def sample_noise(self, xs): - return self.sigma*(np.random.normal(scale=1.0, size=(xs.shape[0], 1)) + np.random.laplace(scale=self.sigma, size=(xs.shape[0], 1)))/2. - @property - def convex(self) -> bool: - return True +class HuberNoise(AdditiveHomoscedasticNoiseModel): + def __init__(self, sigma=0.1): + """ + :param sigma: standard deviation + """ + super().__init__() + self.sigma = sigma + + def sample_noise(self, xs): + return ( + self.sigma + * ( + np.random.normal(scale=1.0, size=(xs.shape[0], 1)) + + np.random.laplace(scale=self.sigma, size=(xs.shape[0], 1)) + ) + / 2.0 + ) + + @property + def convex(self) -> bool: + return True + + def __str__(self): + return "GaussianAdditive" - def __str__(self): - return "GaussianAdditive" class AdditiveBoundedNoise(GaussianNoise): - """ Sub-Gaussian bounded norm, with a Gaussian Likelihood""" - def __init__(self, lower, upper): - super().__init__(upper-lower) - self.lower = lower - self.upper = upper + """Sub-Gaussian bounded norm, with a Gaussian Likelihood""" + + def __init__(self, lower, upper): + super().__init__(upper - lower) + self.lower = lower + self.upper = upper - def sample_noise(self, xs): - raw = np.random.random_sample(size=(xs.shape[0], 1)) - rescaled = self.lower + raw * self.sigma - print(rescaled) - return rescaled # sigma is the length of the interval + def sample_noise(self, xs): + raw = np.random.random_sample(size=(xs.shape[0], 1)) + rescaled = self.lower + raw * self.sigma + print(rescaled) + return rescaled # sigma is the length of the interval - def __str__(self): - return "BoundedNoiseAdditive" + def __str__(self): + return "BoundedNoiseAdditive" class MisspecifiedAdditiveGaussianNoise(GaussianNoise): - def __init__(self, sigma=1.0, actual_sigma=0.1): - """ - :param sigma: standard deviation - """ - super().__init__(sigma=sigma) - self.actual_sigma = actual_sigma + def __init__(self, sigma=1.0, actual_sigma=0.1): + """ + :param sigma: standard deviation + """ + super().__init__(sigma=sigma) + self.actual_sigma = actual_sigma - def sample_noise(self, xs): - return self.actual_sigma*np.random.normal(scale=1.0, size=(xs.shape[0], 1)) + def sample_noise(self, xs): + return self.actual_sigma * np.random.normal(scale=1.0, size=(xs.shape[0], 1)) - def __str__(self): - return "MisspecifiedGaussianAdditive" + def __str__(self): + return "MisspecifiedGaussianAdditive" class LaplaceNoise(GaussianNoise): - def __init__(self, b): - """ - :param sigma: this is sometimes also denoted as b - """ - super().__init__() - self.b = b + def __init__(self, b): + """ + :param sigma: this is sometimes also denoted as b + """ + super().__init__() + self.b = b - def noise_log_likelihood(self, etas): - return -np.log(2*self.b) - np.abs(etas)/self.b + def noise_log_likelihood(self, etas): + return -np.log(2 * self.b) - np.abs(etas) / self.b - def cvxpy_noise_log_likelihood(self, etas): - return -np.log(2*self.b) - cp.abs(etas)/self.b + def cvxpy_noise_log_likelihood(self, etas): + return -np.log(2 * self.b) - cp.abs(etas) / self.b - def sample_noise(self, xs): - return np.random.laplace(loc = 0, scale=self.b, size=(xs.shape[0], 1)) + def sample_noise(self, xs): + return np.random.laplace(loc=0, scale=self.b, size=(xs.shape[0], 1)) - def __str__(self): - return "Laplace" + def __str__(self): + return "Laplace" - @property - def convex(self) -> bool: - return True + @property + def convex(self) -> bool: + return True class AdditiveGumbelNoise(AdditiveHomoscedasticNoiseModel): - def __init__(self, beta, mu): - super().__init__() - self.beta = beta - self.mu = mu + def __init__(self, beta, mu): + super().__init__() + self.beta = beta + self.mu = mu - def sample_noise(self, xs): - return np.random.gumbel(loc=self.mu, scale=self.beta, size=(xs.shape[0],)) + def sample_noise(self, xs): + return np.random.gumbel(loc=self.mu, scale=self.beta, size=(xs.shape[0],)) - def noise_log_likelihood(self, etas): - return -np.log(self.beta) - 1/self.beta*(etas - self.mu) - np.exp(-1/self.beta*(etas-self.mu)) + def noise_log_likelihood(self, etas): + return ( + -np.log(self.beta) + - 1 / self.beta * (etas - self.mu) + - np.exp(-1 / self.beta * (etas - self.mu)) + ) - def cvxpy_noise_log_likelihood(self, etas): - return -np.log(self.beta) - 1/self.beta*(etas - self.mu) - cp.exp(-1/self.beta*(etas-self.mu)) + def cvxpy_noise_log_likelihood(self, etas): + return ( + -np.log(self.beta) + - 1 / self.beta * (etas - self.mu) + - cp.exp(-1 / self.beta * (etas - self.mu)) + ) - def __str__(self): - return "Gumbel" + def __str__(self): + return "Gumbel" + + @property + def convex(self) -> bool: + return True - @property - def convex(self) -> bool: - return True class AdditiveTwoSidedWeibullNoise(AdditiveHomoscedasticNoiseModel): - def __init__(self, scale, shape): - """ - :param scale: lambda - :param shape: k - """ - super().__init__() - self.scale = scale - self.shape = shape - - def noise_log_likelihood(self, etas): - etas = np.abs(etas) - return np.log(0.5*self.shape/self.scale) + (self.shape - 1)*np.log(etas/self.scale) - np.power(etas/self.scale, self.shape) - - def cvxpy_noise_log_likelihood(self, etas): - raise NotImplementedError("cvxpy makes no sense for non-convex sets") - - def sample_noise(self, xs): - signs = np.sign(np.random.normal(size=xs.shape[0])) - weibull = np.random.weibull(self.shape, size=xs.shape[0]) - return self.scale * signs * weibull - - def __str__(self): - return "TwoSidedWeibull" - - @property - def convex(self) -> bool: - return False + def __init__(self, scale, shape): + """ + :param scale: lambda + :param shape: k + """ + super().__init__() + self.scale = scale + self.shape = shape + + def noise_log_likelihood(self, etas): + etas = np.abs(etas) + return ( + np.log(0.5 * self.shape / self.scale) + + (self.shape - 1) * np.log(etas / self.scale) + - np.power(etas / self.scale, self.shape) + ) + + def cvxpy_noise_log_likelihood(self, etas): + raise NotImplementedError("cvxpy makes no sense for non-convex sets") + + def sample_noise(self, xs): + signs = np.sign(np.random.normal(size=xs.shape[0])) + weibull = np.random.weibull(self.shape, size=xs.shape[0]) + return self.scale * signs * weibull + + def __str__(self): + return "TwoSidedWeibull" + + @property + def convex(self) -> bool: + return False + class BernoulliNoise(NoiseModel): - def __init__(self, prob): - """ - :param scale: lambda - Note lambda should work for both cvxpy and np parameter inputs and takes xs, theta - :param shape: p - """ - super().__init__() - self.prob = prob # lambda , $lambda^(1/a) to connect to sampling below + def __init__(self, prob): + """ + :param scale: lambda + Note lambda should work for both cvxpy and np parameter inputs and takes xs, theta + :param shape: p + """ + super().__init__() + self.prob = prob # lambda , $lambda^(1/a) to connect to sampling below - def mean(self, xs): - return self.prob(xs) + def mean(self, xs): + return self.prob(xs) - def sample_noise(self, xs): - bernouli = torch.bernoulli(self.prob(xs).view(-1)) - return bernouli.view(-1,1) + def sample_noise(self, xs): + bernouli = torch.bernoulli(self.prob(xs).view(-1)) + return bernouli.view(-1, 1) - def convex(self): - pass + def convex(self): + pass - def sample(self, xs, theta): - pass + def sample(self, xs, theta): + pass - def log_likelihood(self, ys, xs, theta: Union[np.array, cp.Variable]) -> Union[np.array, cp.Expression]: - pass + def log_likelihood( + self, ys, xs, theta: Union[np.array, cp.Variable] + ) -> Union[np.array, cp.Expression]: + pass class LogWeibullNoise(NoiseModel): - def __init__(self, lam, p = 2, lam_form = lambda x, y: np.exp(x@y)): - """ - :param scale: lambda - Note lambda should work for both cvxpy and np parameter inputs and takes xs, theta - :param shape: p - """ - super().__init__() - self.lam = lam # lambda , $lambda^(1/a) to connect to sampling below - self.p = p # - self.lam_form = lam_form - - def sample(self,xs,theta): - pass - - def log_likelihood(self, ys, xs, theta): - assert(xs is not None) - if isinstance(theta, cp.Variable): - return self.cvxpy_log_likelihood(ys, xs, theta) - else: - return self.noise_log_likelihood(ys, xs, theta) - - def noise_log_likelihood(self,ys, xs, theta): - return np.log(self.lam_form(xs, theta).reshape(-1)) + self.p*ys.reshape(-1) - np.exp(ys).reshape(-1)**self.p*self.lam_form(xs, theta).reshape(-1) - # notice that lam(xs) = exp(\theta^\top xs) in common parametrization hence the loglikelihood becomes - # xs @ theta + p*y - np.exp(y)**p*np.exp(xs@\theta) # which is strongly convex in theta - - def sample_noise(self, xs): - weibull = (self.lam(xs)**(1/self.p)).reshape(-1)*np.random.weibull(self.p, size=xs.shape[0]) - weibull = weibull.reshape(-1,1) - return np.log(weibull) - - def mean(self, xs): - return (np.log(self.lam(xs)) - np.euler_gamma)/self.p - - def cvxpy_log_likelihood(self, ys, xs, theta): - # This works only fi - return xs @ theta + self.p*ys - cp.multiply((np.exp(ys)**self.p).reshape(-1),cp.exp(xs@theta)) - - def __str__(self): - return "logWeibull" - - @property - def convex(self) -> bool: - return True - -class WeibullNoise(LogWeibullNoise): - - def noise_log_likelihood(self,ys, xs, theta): - return np.log(self.lam_form(xs, theta).reshape(-1)) + np.log(self.p * (ys.reshape(-1)**(self.p-1))) - self.lam_form(xs, theta).reshape(-1)*(ys.reshape(-1)**self.p) - # notice that lam(xs) = exp(\theta^\top xs) in common parametrization hence the loglikelihood becomes - # xs @ theta + p*y - np.exp(y)**p*np.exp(xs@\theta) # which is strongly convex in theta + def __init__(self, lam, p=2, lam_form=lambda x, y: np.exp(x @ y)): + """ + :param scale: lambda + Note lambda should work for both cvxpy and np parameter inputs and takes xs, theta + :param shape: p + """ + super().__init__() + self.lam = lam # lambda , $lambda^(1/a) to connect to sampling below + self.p = p # + self.lam_form = lam_form + + def sample(self, xs, theta): + pass + + def log_likelihood(self, ys, xs, theta): + assert xs is not None + if isinstance(theta, cp.Variable): + return self.cvxpy_log_likelihood(ys, xs, theta) + else: + return self.noise_log_likelihood(ys, xs, theta) + + def noise_log_likelihood(self, ys, xs, theta): + return ( + np.log(self.lam_form(xs, theta).reshape(-1)) + + self.p * ys.reshape(-1) + - np.exp(ys).reshape(-1) ** self.p * self.lam_form(xs, theta).reshape(-1) + ) + # notice that lam(xs) = exp(\theta^\top xs) in common parametrization hence the loglikelihood becomes + # xs @ theta + p*y - np.exp(y)**p*np.exp(xs@\theta) # which is strongly convex in theta + + def sample_noise(self, xs): + weibull = (self.lam(xs) ** (1 / self.p)).reshape(-1) * np.random.weibull( + self.p, size=xs.shape[0] + ) + weibull = weibull.reshape(-1, 1) + return np.log(weibull) + + def mean(self, xs): + return (np.log(self.lam(xs)) - np.euler_gamma) / self.p + + def cvxpy_log_likelihood(self, ys, xs, theta): + # This works only fi + return ( + xs @ theta + + self.p * ys + - cp.multiply((np.exp(ys) ** self.p).reshape(-1), cp.exp(xs @ theta)) + ) + + def __str__(self): + return "logWeibull" + + @property + def convex(self) -> bool: + return True - def noise_likelihood(self,ys, xs, theta): - return self.lam_form(xs, theta).reshape(-1)*(self.p * (ys.reshape(-1)**(self.p-1)))*np.exp(- self.lam_form(xs, theta).reshape(-1)*(ys.reshape(-1)**self.p)) - # notice that lam(xs) = exp(\theta^\top xs) in common parametrization hence the loglikelihood becomes - # xs @ theta + p*y - np.exp(y)**p*np.exp(xs@\theta) # which is strongly convex in theta - def sample_noise(self, xs): - convert_lambda = (1/self.lam(xs))**(1/self.p) - weibull = convert_lambda.view(-1)*np.random.weibull(self.p, size=xs.shape[0]) - weibull = weibull.reshape(-1,1) - return weibull - - def mode(self, xs): - convert_lambda = (1/self.lam(xs))**(1/self.p) - return convert_lambda*((((self.p-1)/self.p))**(1/self.p)) +class WeibullNoise(LogWeibullNoise): - def mean(self, xs): - convert_lambda = (1/self.lam(xs))**(1/self.p) - return convert_lambda*scipy.special.gamma(1. + 1./self.p) + def noise_log_likelihood(self, ys, xs, theta): + return ( + np.log(self.lam_form(xs, theta).reshape(-1)) + + np.log(self.p * (ys.reshape(-1) ** (self.p - 1))) + - self.lam_form(xs, theta).reshape(-1) * (ys.reshape(-1) ** self.p) + ) + # notice that lam(xs) = exp(\theta^\top xs) in common parametrization hence the loglikelihood becomes + # xs @ theta + p*y - np.exp(y)**p*np.exp(xs@\theta) # which is strongly convex in theta + + def noise_likelihood(self, ys, xs, theta): + return ( + self.lam_form(xs, theta).reshape(-1) + * (self.p * (ys.reshape(-1) ** (self.p - 1))) + * np.exp(-self.lam_form(xs, theta).reshape(-1) * (ys.reshape(-1) ** self.p)) + ) + # notice that lam(xs) = exp(\theta^\top xs) in common parametrization hence the loglikelihood becomes + # xs @ theta + p*y - np.exp(y)**p*np.exp(xs@\theta) # which is strongly convex in theta + + def sample_noise(self, xs): + convert_lambda = (1 / self.lam(xs)) ** (1 / self.p) + weibull = convert_lambda.view(-1) * np.random.weibull(self.p, size=xs.shape[0]) + weibull = weibull.reshape(-1, 1) + return weibull + + def mode(self, xs): + convert_lambda = (1 / self.lam(xs)) ** (1 / self.p) + return convert_lambda * ((((self.p - 1) / self.p)) ** (1 / self.p)) + + def mean(self, xs): + convert_lambda = (1 / self.lam(xs)) ** (1 / self.p) + return convert_lambda * scipy.special.gamma(1.0 + 1.0 / self.p) if __name__ == "__main__": - import matplotlib.pyplot as plt - - d = 2 - p = 2 - lam = lambda x: torch.exp(torch.sum(x, dim = 1)) - lam_form = lambda x,theta: torch.exp(x@theta) - - W = WeibullNoise(lam, p = p, lam_form=lam_form) - - tstar = torch.ones(size = (2,1)).double() - x = torch.ones(size = (1,2)).double() - print(lam(x), lam_form(x,tstar)) - pdf = lambda y: W.noise_likelihood(y,x,tstar)#torch.exp(W.noise_log_likelihood(y,x,tstar)) - - y = torch.linspace(0,5,1000).double() - #plt.plot(y, pdf(y)) - samples = [] - mean = float(np.log(lam(x))) - for _ in range(10000): - samples.append(-np.log(float(W.sample_noise(x).view(-1)))*p - np.euler_gamma - mean) - - print (np.mean(samples)) - print( (np.pi**2/6)) - print (np.var(samples)) - #plt.plot(np.exp(W.mode(x)),pdf(W.mode(x)),'ko') - - plt.hist(samples, density=True) - plt.show() - - + import matplotlib.pyplot as plt + + d = 2 + p = 2 + lam = lambda x: torch.exp(torch.sum(x, dim=1)) + lam_form = lambda x, theta: torch.exp(x @ theta) + + W = WeibullNoise(lam, p=p, lam_form=lam_form) + + tstar = torch.ones(size=(2, 1)).double() + x = torch.ones(size=(1, 2)).double() + print(lam(x), lam_form(x, tstar)) + pdf = lambda y: W.noise_likelihood( + y, x, tstar + ) # torch.exp(W.noise_log_likelihood(y,x,tstar)) + + y = torch.linspace(0, 5, 1000).double() + # plt.plot(y, pdf(y)) + samples = [] + mean = float(np.log(lam(x))) + for _ in range(10000): + samples.append( + -np.log(float(W.sample_noise(x).view(-1))) * p - np.euler_gamma - mean + ) + + print(np.mean(samples)) + print((np.pi**2 / 6)) + print(np.var(samples)) + # plt.plot(np.exp(W.mode(x)),pdf(W.mode(x)),'ko') + + plt.hist(samples, density=True) + plt.show() diff --git a/stpy/probability/poisson_likelihood.py b/stpy/probability/poisson_likelihood.py index d588fb7..af110a0 100644 --- a/stpy/probability/poisson_likelihood.py +++ b/stpy/probability/poisson_likelihood.py @@ -6,6 +6,7 @@ from stpy.probability.gaussian_likelihood import GaussianLikelihood import scipy + class PoissonLikelihoodCanonical(GaussianLikelihood): def __init__(self): @@ -13,66 +14,80 @@ def __init__(self): def evaluate_datapoint(self, theta, d, mask): if mask is None: - mask = 1. + mask = 1.0 x, y = d - r = -y*x@theta + torch.exp(x@theta) + r = -y * x @ theta + torch.exp(x @ theta) r = r * mask return r def link(self, s): return torch.exp(s) - def scale(self, err = None, bound = None): + def scale(self, err=None, bound=None): return np.exp(bound) - def get_objective_cvxpy(self, mask = None): + def get_objective_cvxpy(self, mask=None): if mask is None: + def likelihood(theta): - return -self.y.T@(self.x @ theta) + cp.sum(cp.exp(self.x@theta)) + return -self.y.T @ (self.x @ theta) + cp.sum(cp.exp(self.x @ theta)) + else: + def likelihood(theta): - if torch.sum(mask.double())>1e-8: - return -(mask*self.y).T@(self.x @ theta) + mask.T @ cp.exp(self.x@theta) + if torch.sum(mask.double()) > 1e-8: + return -(mask * self.y).T @ (self.x @ theta) + mask.T @ cp.exp( + self.x @ theta + ) else: - return cp.sum(theta*0) + return cp.sum(theta * 0) + return likelihood - def get_confidence_set_cvxpy(self, - theta: cp.Variable, - type: Union[str, None] = None, - params: Dict = {}, - delta: float = 0.1): + def get_confidence_set_cvxpy( + self, + theta: cp.Variable, + type: Union[str, None] = None, + params: Dict = {}, + delta: float = 0.1, + ): if self.fitted == True: return self.set_fn(theta) - theta_fit = params['estimate'] - H = params['regularizer_hessian'] + theta_fit = params["estimate"] + H = params["regularizer_hessian"] lam = torch.max(torch.linalg.eigvalsh(H)) - B = params['bound'] - d_eff = params['d_eff'] - bound = params['bound'] + B = params["bound"] + d_eff = params["d_eff"] + bound = params["bound"] if type == "LR": beta = self.confidence_parameter(delta, params, type=type) set = self.lr_confidence_set_cvxpy(theta, beta, params) - elif type in ['mutny']: + elif type in ["mutny"]: vars = np.exp(bound) - V = self.x.T @torch.diag(vars)@ self.x + H + V = self.x.T @ torch.diag(vars) @ self.x + H L = torch.from_numpy(scipy.linalg.sqrtm(V.numpy())) - beta = 2.*np.log(1./delta) - self.set_fn = lambda theta: [cp.sum_squares(L @ (theta - theta_fit)) <= beta] + beta = 2.0 * np.log(1.0 / delta) + self.set_fn = lambda theta: [ + cp.sum_squares(L @ (theta - theta_fit)) <= beta + ] set = self.set_fn(theta) - elif type in ['laplace']: - vars = torch.exp(self.x@ theta_fit).view(-1) - V = self.x.T @torch.diag(vars) @ self.x + H + elif type in ["laplace"]: + vars = torch.exp(self.x @ theta_fit).view(-1) + V = self.x.T @ torch.diag(vars) @ self.x + H L = torch.from_numpy(scipy.linalg.sqrtm(V.numpy())) - beta = 2.*np.log(1./delta) - self.set_fn = lambda theta: [cp.sum_squares(L @ (theta - theta_fit)) <= beta] + beta = 2.0 * np.log(1.0 / delta) + self.set_fn = lambda theta: [ + cp.sum_squares(L @ (theta - theta_fit)) <= beta + ] set = self.set_fn(theta) else: - raise NotImplementedError("The desired confidence set type is not supported.") + raise NotImplementedError( + "The desired confidence set type is not supported." + ) self.set = set self.fitted = True @@ -80,35 +95,43 @@ def get_confidence_set_cvxpy(self, return set def information_matrix(self): - V = self.x.T@self.x/self.sigma + V = self.x.T @ self.x / self.sigma return V - def confidence_parameter(self, delta, params, type = None): - H = params['regularizer_hessian'] + def confidence_parameter(self, delta, params, type=None): + H = params["regularizer_hessian"] lam = torch.max(torch.linalg.eigvalsh(H)) - B = params['bound'] - d_eff = params['d_eff'] + B = params["bound"] + d_eff = params["d_eff"] if type is None or type == "none" or type == "laplace": # this is a common heuristic - beta = 2.0 + beta = 2.0 elif type == "adaptive-AB": - sigma = 1./4. - V = self.x.T @ self.x / sigma ** 2 + H - beta = 2 * np.log(1. / delta) + (torch.logdet(V + H) - torch.logdet(H)) + lam * B + sigma = 1.0 / 4.0 + V = self.x.T @ self.x / sigma**2 + H + beta = ( + 2 * np.log(1.0 / delta) + + (torch.logdet(V + H) - torch.logdet(H)) + + lam * B + ) elif type == "LR": # this is based on sequential LR test beta = self.confidence_parameter_likelihood_ratio(delta, params) elif type == "Faubry": - H = params['regularizer_hessian'] - lam = H[0., 0] - theta_fit = params['estimate'] - D = torch.diag(1./(self.x @ theta_fit).view(-1)) + H = params["regularizer_hessian"] + lam = H[0.0, 0] + theta_fit = params["estimate"] + D = torch.diag(1.0 / (self.x @ theta_fit).view(-1)) V = self.x.T @ D @ self.x + H - beta = np.sqrt(lam)/2. + 2./np.sqrt(lam)*(torch.logdet(V) - torch.logdet(H)) + 2/np.sqrt(lam)* np.log(1/delta)*d_eff + beta = ( + np.sqrt(lam) / 2.0 + + 2.0 / np.sqrt(lam) * (torch.logdet(V) - torch.logdet(H)) + + 2 / np.sqrt(lam) * np.log(1 / delta) * d_eff + ) else: raise NotImplementedError("Not implemented") return beta diff --git a/stpy/probability/robust_likelihood.py b/stpy/probability/robust_likelihood.py index 0cab487..8cc2dd7 100644 --- a/stpy/probability/robust_likelihood.py +++ b/stpy/probability/robust_likelihood.py @@ -4,9 +4,10 @@ from typing import Union, Dict, List from stpy.probability.likelihood import Likelihood + class RobustGraphicalLikelihood(Likelihood): - def __init__(self, coin, supp, sigma = 0.1): + def __init__(self, coin, supp, sigma=0.1): super().__init__() self.coin = coin self.supp = supp @@ -20,48 +21,68 @@ def evaluate_datapoint(self, theta, d): return torch.log(1 + torch.exp()) def add_data_point(self, d): - x,y = d - self.x = torch.vstack(self.x,x) - self.y = torch.vstack(self.y,y) + x, y = d + self.x = torch.vstack(self.x, x) + self.y = torch.vstack(self.y, y) self.fitted = False def load_data(self, D): self.x, self.y = D self.fitted = False - def get_objective_cvxpy(self, mask = None): + def get_objective_cvxpy(self, mask=None): if mask is None: if self.Sigma is None: - def likelihood(theta): return cp.sum(cp.abs(self.x@theta - self.y)/self.sigma) + + def likelihood(theta): + return cp.sum(cp.abs(self.x @ theta - self.y) / self.sigma) else: - def likelihood(theta): return cp.sum(cp.abs(torch.linalg.inv(self.Sigma)@(self.x@theta - self.y))) + + def likelihood(theta): + return cp.sum( + cp.abs(torch.linalg.inv(self.Sigma) @ (self.x @ theta - self.y)) + ) + else: if self.Sigma is None: + def likelihood(theta): - if torch.sum(mask.int())>0: - return cp.sum(cp.abs(self.x[mask,:]@theta - self.y[mask,:])/self.sigma) + if torch.sum(mask.int()) > 0: + return cp.sum( + cp.abs(self.x[mask, :] @ theta - self.y[mask, :]) + / self.sigma + ) else: - return cp.sum(theta*0) + return cp.sum(theta * 0) else: + def likelihood(theta): - if torch.sum(mask.int())>0: - return cp.sum(cp.abs(torch.linalg.inv(self.Sigma)@(self.x[mask,:]@theta - self.y[mask,:]))) + if torch.sum(mask.int()) > 0: + return cp.sum( + cp.abs( + torch.linalg.inv(self.Sigma) + @ (self.x[mask, :] @ theta - self.y[mask, :]) + ) + ) else: - return cp.sum(theta*0) + return cp.sum(theta * 0) + return likelihood - def get_confidence_set_cvxpy(self, - theta: cp.Variable, - type: Union[str, None] = None, - params: Dict = {}, - delta: float = 0.1): + def get_confidence_set_cvxpy( + self, + theta: cp.Variable, + type: Union[str, None] = None, + params: Dict = {}, + delta: float = 0.1, + ): if self.fitted == True: return self.set_fn(theta) - theta_fit = params['estimate'] - H = params['regularizer_hessian'] + theta_fit = params["estimate"] + H = params["regularizer_hessian"] beta = self.confidence_parameter(delta, params, type=type) @@ -78,13 +99,14 @@ def get_confidence_set_cvxpy(self, set = self.lr_confidence_set_cvxpy(theta, beta, params) else: - raise NotImplementedError("The desired confidence set type is not supported.") + raise NotImplementedError( + "The desired confidence set type is not supported." + ) self.set = set self.fitted = True return set - def get_objective_torch(self): raise NotImplementedError("Implement me please.") diff --git a/stpy/probability/weibul_likelihood.py b/stpy/probability/weibul_likelihood.py index 6d11179..77c509c 100644 --- a/stpy/probability/weibul_likelihood.py +++ b/stpy/probability/weibul_likelihood.py @@ -15,21 +15,19 @@ def __init__(self, p): def information_matrix(self, theta_fit): pass - def normalization(self, d): pass - - def evaluate_datapoint(self, theta, d, mask = None): + def evaluate_datapoint(self, theta, d, mask=None): if mask is None: - mask = 1. + mask = 1.0 x, y = d lam = torch.exp(x @ theta) l = -torch.log(lam) + (y ** (self.p)) * lam l = l * mask return l - def scale(self, err = None, bound = None): + def scale(self, err=None, bound=None): return np.exp(bound) def add_data_point(self, d): @@ -50,33 +48,44 @@ def get_objective_torch(self): def get_objective_cvxpy(self, mask=None): if mask is None: + def likelihood(theta): - return -cp.sum(self.x@theta) + cp.sum(cp.diag(self.y**(self.p))@cp.exp(self.x @ theta)) + return -cp.sum(self.x @ theta) + cp.sum( + cp.diag(self.y ** (self.p)) @ cp.exp(self.x @ theta) + ) + else: + def likelihood(theta): - if torch.sum(mask.int())>0: - return - cp.sum(self.x[mask,:] @ theta) + cp.sum(cp.diag(self.y[mask,:]**(self.p))@cp.exp(self.x[mask,:] @ theta)) + if torch.sum(mask.int()) > 0: + return -cp.sum(self.x[mask, :] @ theta) + cp.sum( + cp.diag(self.y[mask, :] ** (self.p)) + @ cp.exp(self.x[mask, :] @ theta) + ) else: return cp.sum(theta * 0) + return likelihood - def get_confidence_set_cvxpy(self, - theta: cp.Variable, - type: Union[str, None] = None, - params: Dict = {}, - delta: float = 0.1): + def get_confidence_set_cvxpy( + self, + theta: cp.Variable, + type: Union[str, None] = None, + params: Dict = {}, + delta: float = 0.1, + ): if self.fitted == True: return self.set_fn(theta) - theta_fit = params['estimate'] - H = params['regularizer_hessian'] + theta_fit = params["estimate"] + H = params["regularizer_hessian"] beta = self.confidence_parameter(delta, params, type=type) if type in ["laplace"]: V = self.information_matrix(theta_fit) if H is not None: - V += H + V += H self.set_fn = lambda theta: [cp.quad_form(theta - theta_fit, V) <= beta] set = self.set_fn(theta) @@ -84,18 +93,22 @@ def get_confidence_set_cvxpy(self, set = self.lr_confidence_set_cvxpy(theta, beta, params) else: - raise NotImplementedError("The desired confidence set type is not supported.") + raise NotImplementedError( + "The desired confidence set type is not supported." + ) self.set = set self.fitted = True return set - def confidence_parameter(self, delta, params, type = None): + def confidence_parameter(self, delta, params, type=None): if type == "LR": # this is based on sequential LR test beta = self.confidence_parameter_likelihood_ratio(delta, params) elif type == "laplace": - beta = 2. + beta = 2.0 else: - raise NotImplementedError("The desired confidence set type is not supported.") - return beta \ No newline at end of file + raise NotImplementedError( + "The desired confidence set type is not supported." + ) + return beta diff --git a/stpy/random_process.py b/stpy/random_process.py index 85aa85a..9839a28 100755 --- a/stpy/random_process.py +++ b/stpy/random_process.py @@ -3,335 +3,546 @@ import matplotlib.pyplot as plt import matplotlib + class RandomProcess: - def visualize_function(self,xtest,f_trues, filename = None, colors = None): - from mpl_toolkits.mplot3d import axes3d, Axes3D - d = xtest.size()[1] - if d == 1: - if isinstance(f_trues, list): - for f_true in f_trues: - plt.plot(xtest,f_true(xtest)) - else: - plt.plot(xtest, f_trues(xtest)) - elif d == 2: - from scipy.interpolate import griddata - plt.figure(figsize=(15, 7)) - plt.clf() - ax = plt.axes(projection='3d') - xx = xtest[:, 0].numpy() - yy = xtest[:, 1].numpy() - grid_x, grid_y = np.mgrid[min(xx):max(xx):100j, min(yy):max(yy):100j] - if isinstance(f_trues, list): - for index, f_true in enumerate(f_trues): - grid_z = griddata((xx, yy), f_true(xtest)[:, 0].numpy(), (grid_x, grid_y), method='linear') - if colors is not None: - color = colors[index] - ax.plot_surface(grid_x, grid_y, grid_z, alpha=0.4, color = color) - else: - grid_z = griddata((xx, yy), f_trues(xtest)[:, 0].numpy(), (grid_x, grid_y), method='linear') - ax.plot_surface(grid_x, grid_y, grid_z, alpha=0.4) - - if filename is not None: - plt.xticks(fontsize=20, rotation=0) - plt.yticks(fontsize=20, rotation=0) - plt.savefig(filename, dpi = 300) - - - - - def visualize_function_contour(self, xtest, f_true, filename = None, levels = 10, figsize = (15, 7)): - from mpl_toolkits.mplot3d import axes3d, Axes3D - d = xtest.size()[1] - if d ==1: - pass - elif d == 2: - from scipy.interpolate import griddata - xx = xtest[:, 0].numpy() - yy = xtest[:, 1].numpy() - grid_x, grid_y = np.mgrid[min(xx):max(xx):100j, min(yy):max(yy):100j] - f = f_true(xtest) - grid_z_f = griddata((xx, yy), f[:, 0].detach().numpy(), (grid_x, grid_y), method='linear') - - fig, ax = plt.subplots(figsize=figsize) - cs = ax.contourf(grid_x, grid_y, grid_z_f,levels= levels) - ax.contour(cs, colors='k') - cbar = fig.colorbar(cs) - #if self.x is not None: - # ax.scatter(self.x[:, 0].detach().numpy(), self.x[:, 1].detach().numpy(), c='r', s=100, marker="o") - ax.grid(c='k', ls='-', alpha=0.1) - - if filename is not None: - plt.xticks(fontsize=24, rotation=0) - plt.yticks(fontsize=24, rotation=0) - plt.savefig(filename, dpi = 300) - #plt.show() - - def visualize(self,xtest,f_true = None, points = True, show = True, size = 2, - norm = 1, fig = True, sqrtbeta = 2, constrained = None, d = None, matheron_kernel=None): - from mpl_toolkits.mplot3d import axes3d, Axes3D - - [mu, std] = self.mean_std(xtest) - - if d is None: - d = self.d - - if d == 1: - if fig == True: - plt.figure(figsize=(15, 7)) - plt.clf() - if self.x is not None: - plt.plot(self.x.detach().numpy(), self.y.detach().numpy(), 'r+', ms=10, marker="o") - if size > 0: - - if matheron_kernel is not None: - z = self.sample_matheron(xtest,matheron_kernel, size=size).numpy().T - else: - z = self.sample(xtest, size=size).numpy().T - - for z_arr,label in zip(z,['sample']+[None for _ in range(size-1)]): - plt.plot(xtest.view(-1).numpy(),z_arr, 'k--', lw = 2, label = label) - - plt.fill_between(xtest.numpy().flat, (mu - sqrtbeta * std).numpy().flat, (mu + sqrtbeta * std).numpy().flat,color="#dddddd") - if f_true is not None: - plt.plot(xtest.numpy(),f_true(xtest).numpy(),'b-',lw = 2, label = "truth") - plt.plot(xtest.numpy(), mu.numpy(), 'r-', lw=2, label="posterior mean") - #plt.title('Posterior mean prediction plus 2 st.deviation') - plt.legend() - if show == True: - plt.show() - - elif d == 2: - from scipy.interpolate import griddata - plt.figure(figsize=(15,7)) - plt.clf() - ax = plt.axes(projection='3d') - xx = xtest[:, 0].numpy() - yy = xtest[:, 1].numpy() - grid_x, grid_y = np.mgrid[min(xx):max(xx):100j, min(yy):max(yy):100j] - grid_z_mu = griddata((xx, yy), mu[:, 0].detach().numpy(), (grid_x, grid_y), method='linear') - if f_true is not None: - grid_z = griddata((xx, yy), f_true(xtest)[:,0].numpy(), (grid_x, grid_y), method='linear') - ax.plot_surface(grid_x, grid_y, grid_z, color='b', alpha=0.4, label = "truth") - if points == True and self.fit == True: - ax.scatter(self.x[:, 0].detach().numpy(), self.x[:, 1].detach().numpy(), self.y[:,0].detach().numpy(), c='r', s=100, marker="o", depthshade=False) - if self.beta is not None: - beta = self.beta(norm = norm) - grid_z2 = griddata((xx, yy), (mu.detach()+beta*std.detach())[:, 0].detach().numpy(), (grid_x, grid_y), method='linear') - ax.plot_surface(grid_x, grid_y, grid_z2, color='gray', alpha=0.2) - grid_z3 = griddata((xx, yy), (mu.detach()-beta*std.detach())[:, 0].detach().numpy(), (grid_x, grid_y), method='linear') - ax.plot_surface(grid_x, grid_y, grid_z3, color='gray', alpha=0.2) - - ax.plot_surface(grid_x, grid_y, grid_z_mu, color='r', alpha=0.4) - #plt.title('Posterior mean prediction plus 2 st.deviation') - plt.show() - - else: - print("Visualization not implemented") - - def visualize_subopt(self,xtest,f_true = None, points = True, show = True, size = 2, norm = 1, fig = True, beta = 2): - from mpl_toolkits.mplot3d import axes3d, Axes3D - [mu, std] = self.mean_std(xtest) - - print ("Visualizing in: ", self.d, "dimensions...") - - if self.d == 1: - if fig == True: - plt.figure(figsize=(15, 7)) - plt.clf() - if self.x is not None: - plt.plot(self.x.detach().numpy(), self.y.detach().numpy(), 'r+', ms=10, marker="o") - plt.plot(xtest.numpy(), self.sample(xtest, size=size).numpy(), 'k--', lw=2, label="sample") - plt.fill_between(xtest.numpy().flat, (mu - 2 * std).numpy().flat, (mu + 2 * std).numpy().flat,color="#dddddd") - if f_true is not None: - plt.plot(xtest.numpy(),f_true(xtest).numpy(),'b-',lw = 2, label = "truth") - plt.plot(xtest.numpy(), mu.numpy(), 'r-', lw=2, label="posterior mean") - - min = torch.max(mu - beta*std) - mask = (mu + beta*std < min) - v = torch.min(mu - beta * std).numpy()-1 - plt.plot(xtest.numpy()[mask], 0*xtest.numpy()[mask]+v,'ko', lw = 6,label = "Discarted Region") - - - - plt.title('Posterior mean prediction plus 2 st.deviation') - plt.legend() - - if show == True: - plt.show() - - def visualize_slice(self,xtest,slice, show = True, eps = None, size = 1, beta = 2): - append = torch.ones(size = (xtest.size()[0],1), dtype=torch.float64)*slice - xtest2 = torch.cat((xtest,append), dim = 1) - - [mu, std] = self.mean_std(xtest2) - - plt.figure(figsize=(15, 7)) - plt.clf() - plt.plot(xtest.numpy(), self.sample(xtest, size=size).numpy(), 'k--', lw=2, label="sample") - print(std.size(), mu.size()) - if self.x is not None: - plt.plot(self.x[:,0].detach().numpy(), self.y.detach().numpy(), 'r+', ms=10, marker="o") - plt.fill_between(xtest.numpy().flat, (mu - 2 * std).numpy().flat, (mu + 2 * std).numpy().flat, color="#dddddd") - plt.fill_between(xtest.numpy().flat, (mu + 2 * std).numpy().flat, (mu + 2 * std + 2*self.s).numpy().flat, color="#bbdefb") - plt.fill_between(xtest.numpy().flat, (mu - 2 * std - 2*self.s).numpy().flat, (mu - 2 * std).numpy().flat, color="#bbdefb") - - if eps is not None: - mask = (beta*std < eps) - v = torch.min(mu - beta * std - 2*self.s).numpy() - plt.plot(xtest.numpy()[mask], 0*xtest.numpy()[mask]+v,'k', lw = 6,label = "$\\mathcal{D}_E$ - $\\epsilon$ accurate domain in a subspace") - - plt.plot(xtest.numpy(), mu.numpy(), 'r-', lw=2, label="posterior mean") - plt.title('Posterior mean prediction plus 2 st.deviation') - plt.legend() - if show == True: - plt.show() - - - - def visualize_contour_with_gap(self,xtest,f_true = None, gap = None, show = False): - [mu, _] = self.mean_std(xtest) - - if self.d == 2: - from scipy.interpolate import griddata - xx = xtest[:, 0].detach().numpy() - yy = xtest[:, 1].detach().numpy() - grid_x, grid_y = np.mgrid[min(xx):max(xx):100j, min(yy):max(yy):100j] - grid_z_mu = griddata((xx, yy), mu[:, 0].detach().numpy(), (grid_x, grid_y), method='linear') - - fig, ax = plt.subplots(figsize=(15, 7)) - cs = ax.contourf(grid_x, grid_y, grid_z_mu) - ax.contour(cs, colors='k') - - ax.plot(self.x[:, 0].detach().numpy(), self.x[:, 1].detach().numpy(), 'ro', ms=10) - cbar = fig.colorbar(cs) - - ax.grid(c='k', ls='-', alpha=0.1) - - if f_true is not None: - f = f_true(xtest) - grid_z_f = griddata((xx, yy), f[:, 0].detach().numpy(), (grid_x, grid_y), method='linear') - fig, ax = plt.subplots(figsize=(15, 7)) - cs = ax.contourf(grid_x, grid_y, grid_z_f) - ax.contour(cs, colors='k') - cbar = fig.colorbar(cs) - ax.grid(c='k', ls='-', alpha=0.1) - if show == True: - plt.show() - - def visualize_contour(self,xtest,f_true = None, show = True, points = True, ms = 5, levels = 20): - [mu, _] = self.mean_std(xtest) - - if self.d == 2: - from scipy.interpolate import griddata - xx = xtest[:, 0].detach().numpy() - yy = xtest[:, 1].detach().numpy() - grid_x, grid_y = np.mgrid[min(xx):max(xx):100j, min(yy):max(yy):100j] - grid_z_mu = griddata((xx, yy), mu[:, 0].detach().numpy(), (grid_x, grid_y), method='linear') - fig, ax = plt.subplots(figsize=(15, 7)) - cs = ax.contourf(grid_x, grid_y, grid_z_mu) - ax.contour(cs, colors='k') - if points == True: - ax.plot(self.x[:, 0].detach().numpy(), self.x[:, 1].detach().numpy(), 'wo', ms=ms, alpha = 0.5) - cbar = fig.colorbar(cs) - ax.grid(c='k', ls='-', alpha=0.1) - - if f_true is not None: - f = f_true(xtest) - grid_z_f = griddata((xx, yy), f[:, 0].detach().numpy(), (grid_x, grid_y), method='linear') - fig, ax = plt.subplots(figsize=(15, 7)) - cs = ax.contourf(grid_x, grid_y, grid_z_f, levels = levels) - ax.contour(cs, colors='k') - cbar = fig.colorbar(cs) - ax.grid(c='k', ls='-', alpha=0.1) - if show == True: - plt.show() - return ax - - def visualize_quiver(self,xtest, size = 2,norm = 1): - from mpl_toolkits.mplot3d import axes3d, Axes3D - [mu, std] = self.mean_std(xtest) - if self.d == 2: - from scipy.interpolate import griddata - plt.figure(figsize=(15,7)) - plt.clf() - ax = plt.axes(projection='3d') - xx = xtest[:, 0].detach().numpy() - yy = xtest[:, 1].detach().numpy() - grid_x, grid_y = np.mgrid[min(xx):max(xx):100j, min(yy):max(yy):100j] - grid_z_mu = griddata((xx, yy), mu[:, 0].detach().numpy(), (grid_x, grid_y), method='linear') - # - - ax.scatter(self.x[:, 0].detach().numpy(), self.x[:, 1].detach().numpy(), self.y[:,0].detach().numpy(), c='r', s=100, marker="o", depthshade=False) - - if self.beta is not None: - beta = self.beta(norm = norm) - grid_z2 = griddata((xx, yy), (mu.detach()+beta*std.detach())[:, 0].detach().numpy(), (grid_x, grid_y), method='linear') - ax.plot_surface(grid_x, grid_y, grid_z2, color='gray', alpha=0.2) - grid_z3 = griddata((xx, yy), (mu.detach()-beta*std.detach())[:, 0].detach().numpy(), (grid_x, grid_y), method='linear') - ax.plot_surface(grid_x, grid_y, grid_z3, color='gray', alpha=0.2) - - ax.plot_surface(grid_x, grid_y, grid_z_mu, color='r', alpha=0.4) - plt.title('Posterior mean prediction plus 2 st.deviation') - - - derivatives = torch.zeros(xtest.size()[0],2) - for index,point in enumerate(xtest): - derivatives[index,:] = self.mean_gradient_hessian(point.view(-1,2)) - print (derivatives[index,:] ) - - print (derivatives.size()) - - - grid_der_x_mu = griddata((xx, yy), derivatives[:, 0].detach().numpy(), (grid_x, grid_y), method='linear') - grid_der_y_mu = griddata((xx, yy), derivatives[:, 1].detach().numpy(), (grid_x, grid_y), method='linear') - - fig, ax = plt.subplots(figsize=(15, 7)) - cs = ax.contourf(grid_x, grid_y, grid_z_mu) - - ax.contour(cs, colors='k') - - # Plot grid. - ax.grid(c='k', ls='-', alpha=0.1) - ax.quiver(grid_x, grid_y, grid_der_x_mu, grid_der_y_mu) - - plt.show() - - else: - print("Visualization not implemented") + def visualize_function(self, xtest, f_trues, filename=None, colors=None): + from mpl_toolkits.mplot3d import axes3d, Axes3D + + d = xtest.size()[1] + if d == 1: + if isinstance(f_trues, list): + for f_true in f_trues: + plt.plot(xtest, f_true(xtest)) + else: + plt.plot(xtest, f_trues(xtest)) + elif d == 2: + from scipy.interpolate import griddata + + plt.figure(figsize=(15, 7)) + plt.clf() + ax = plt.axes(projection="3d") + xx = xtest[:, 0].numpy() + yy = xtest[:, 1].numpy() + grid_x, grid_y = np.mgrid[ + min(xx) : max(xx) : 100j, min(yy) : max(yy) : 100j + ] + if isinstance(f_trues, list): + for index, f_true in enumerate(f_trues): + grid_z = griddata( + (xx, yy), + f_true(xtest)[:, 0].numpy(), + (grid_x, grid_y), + method="linear", + ) + if colors is not None: + color = colors[index] + ax.plot_surface(grid_x, grid_y, grid_z, alpha=0.4, color=color) + else: + grid_z = griddata( + (xx, yy), + f_trues(xtest)[:, 0].numpy(), + (grid_x, grid_y), + method="linear", + ) + ax.plot_surface(grid_x, grid_y, grid_z, alpha=0.4) + + if filename is not None: + plt.xticks(fontsize=20, rotation=0) + plt.yticks(fontsize=20, rotation=0) + plt.savefig(filename, dpi=300) + + def visualize_function_contour( + self, xtest, f_true, filename=None, levels=10, figsize=(15, 7) + ): + from mpl_toolkits.mplot3d import axes3d, Axes3D + + d = xtest.size()[1] + if d == 1: + pass + elif d == 2: + from scipy.interpolate import griddata + + xx = xtest[:, 0].numpy() + yy = xtest[:, 1].numpy() + grid_x, grid_y = np.mgrid[ + min(xx) : max(xx) : 100j, min(yy) : max(yy) : 100j + ] + f = f_true(xtest) + grid_z_f = griddata( + (xx, yy), f[:, 0].detach().numpy(), (grid_x, grid_y), method="linear" + ) + + fig, ax = plt.subplots(figsize=figsize) + cs = ax.contourf(grid_x, grid_y, grid_z_f, levels=levels) + ax.contour(cs, colors="k") + cbar = fig.colorbar(cs) + # if self.x is not None: + # ax.scatter(self.x[:, 0].detach().numpy(), self.x[:, 1].detach().numpy(), c='r', s=100, marker="o") + ax.grid(c="k", ls="-", alpha=0.1) + + if filename is not None: + plt.xticks(fontsize=24, rotation=0) + plt.yticks(fontsize=24, rotation=0) + plt.savefig(filename, dpi=300) + # plt.show() + + def visualize( + self, + xtest, + f_true=None, + points=True, + show=True, + size=2, + norm=1, + fig=True, + sqrtbeta=2, + constrained=None, + d=None, + matheron_kernel=None, + ): + from mpl_toolkits.mplot3d import axes3d, Axes3D + + [mu, std] = self.mean_std(xtest) + + if d is None: + d = self.d + + if d == 1: + if fig == True: + plt.figure(figsize=(15, 7)) + plt.clf() + if self.x is not None: + plt.plot( + self.x.detach().numpy(), + self.y.detach().numpy(), + "r+", + ms=10, + marker="o", + ) + if size > 0: + + if matheron_kernel is not None: + z = ( + self.sample_matheron(xtest, matheron_kernel, size=size) + .numpy() + .T + ) + else: + z = self.sample(xtest, size=size).numpy().T + + for z_arr, label in zip( + z, ["sample"] + [None for _ in range(size - 1)] + ): + plt.plot(xtest.view(-1).numpy(), z_arr, "k--", lw=2, label=label) + + plt.fill_between( + xtest.numpy().flat, + (mu - sqrtbeta * std).numpy().flat, + (mu + sqrtbeta * std).numpy().flat, + color="#dddddd", + ) + if f_true is not None: + plt.plot( + xtest.numpy(), f_true(xtest).numpy(), "b-", lw=2, label="truth" + ) + plt.plot(xtest.numpy(), mu.numpy(), "r-", lw=2, label="posterior mean") + # plt.title('Posterior mean prediction plus 2 st.deviation') + plt.legend() + if show == True: + plt.show() + + elif d == 2: + from scipy.interpolate import griddata + + plt.figure(figsize=(15, 7)) + plt.clf() + ax = plt.axes(projection="3d") + xx = xtest[:, 0].numpy() + yy = xtest[:, 1].numpy() + grid_x, grid_y = np.mgrid[ + min(xx) : max(xx) : 100j, min(yy) : max(yy) : 100j + ] + grid_z_mu = griddata( + (xx, yy), mu[:, 0].detach().numpy(), (grid_x, grid_y), method="linear" + ) + if f_true is not None: + grid_z = griddata( + (xx, yy), + f_true(xtest)[:, 0].numpy(), + (grid_x, grid_y), + method="linear", + ) + ax.plot_surface( + grid_x, grid_y, grid_z, color="b", alpha=0.4, label="truth" + ) + if points == True and self.fit == True: + ax.scatter( + self.x[:, 0].detach().numpy(), + self.x[:, 1].detach().numpy(), + self.y[:, 0].detach().numpy(), + c="r", + s=100, + marker="o", + depthshade=False, + ) + if self.beta is not None: + beta = self.beta(norm=norm) + grid_z2 = griddata( + (xx, yy), + (mu.detach() + beta * std.detach())[:, 0].detach().numpy(), + (grid_x, grid_y), + method="linear", + ) + ax.plot_surface(grid_x, grid_y, grid_z2, color="gray", alpha=0.2) + grid_z3 = griddata( + (xx, yy), + (mu.detach() - beta * std.detach())[:, 0].detach().numpy(), + (grid_x, grid_y), + method="linear", + ) + ax.plot_surface(grid_x, grid_y, grid_z3, color="gray", alpha=0.2) + + ax.plot_surface(grid_x, grid_y, grid_z_mu, color="r", alpha=0.4) + # plt.title('Posterior mean prediction plus 2 st.deviation') + plt.show() + + else: + print("Visualization not implemented") + + def visualize_subopt( + self, + xtest, + f_true=None, + points=True, + show=True, + size=2, + norm=1, + fig=True, + beta=2, + ): + from mpl_toolkits.mplot3d import axes3d, Axes3D + + [mu, std] = self.mean_std(xtest) + + print("Visualizing in: ", self.d, "dimensions...") + + if self.d == 1: + if fig == True: + plt.figure(figsize=(15, 7)) + plt.clf() + if self.x is not None: + plt.plot( + self.x.detach().numpy(), + self.y.detach().numpy(), + "r+", + ms=10, + marker="o", + ) + plt.plot( + xtest.numpy(), + self.sample(xtest, size=size).numpy(), + "k--", + lw=2, + label="sample", + ) + plt.fill_between( + xtest.numpy().flat, + (mu - 2 * std).numpy().flat, + (mu + 2 * std).numpy().flat, + color="#dddddd", + ) + if f_true is not None: + plt.plot( + xtest.numpy(), f_true(xtest).numpy(), "b-", lw=2, label="truth" + ) + plt.plot(xtest.numpy(), mu.numpy(), "r-", lw=2, label="posterior mean") + + min = torch.max(mu - beta * std) + mask = mu + beta * std < min + v = torch.min(mu - beta * std).numpy() - 1 + plt.plot( + xtest.numpy()[mask], + 0 * xtest.numpy()[mask] + v, + "ko", + lw=6, + label="Discarted Region", + ) + + plt.title("Posterior mean prediction plus 2 st.deviation") + plt.legend() + + if show == True: + plt.show() + + def visualize_slice(self, xtest, slice, show=True, eps=None, size=1, beta=2): + append = torch.ones(size=(xtest.size()[0], 1), dtype=torch.float64) * slice + xtest2 = torch.cat((xtest, append), dim=1) + + [mu, std] = self.mean_std(xtest2) + + plt.figure(figsize=(15, 7)) + plt.clf() + plt.plot( + xtest.numpy(), + self.sample(xtest, size=size).numpy(), + "k--", + lw=2, + label="sample", + ) + print(std.size(), mu.size()) + if self.x is not None: + plt.plot( + self.x[:, 0].detach().numpy(), + self.y.detach().numpy(), + "r+", + ms=10, + marker="o", + ) + plt.fill_between( + xtest.numpy().flat, + (mu - 2 * std).numpy().flat, + (mu + 2 * std).numpy().flat, + color="#dddddd", + ) + plt.fill_between( + xtest.numpy().flat, + (mu + 2 * std).numpy().flat, + (mu + 2 * std + 2 * self.s).numpy().flat, + color="#bbdefb", + ) + plt.fill_between( + xtest.numpy().flat, + (mu - 2 * std - 2 * self.s).numpy().flat, + (mu - 2 * std).numpy().flat, + color="#bbdefb", + ) + + if eps is not None: + mask = beta * std < eps + v = torch.min(mu - beta * std - 2 * self.s).numpy() + plt.plot( + xtest.numpy()[mask], + 0 * xtest.numpy()[mask] + v, + "k", + lw=6, + label="$\\mathcal{D}_E$ - $\\epsilon$ accurate domain in a subspace", + ) + + plt.plot(xtest.numpy(), mu.numpy(), "r-", lw=2, label="posterior mean") + plt.title("Posterior mean prediction plus 2 st.deviation") + plt.legend() + if show == True: + plt.show() + + def visualize_contour_with_gap(self, xtest, f_true=None, gap=None, show=False): + [mu, _] = self.mean_std(xtest) + + if self.d == 2: + from scipy.interpolate import griddata + + xx = xtest[:, 0].detach().numpy() + yy = xtest[:, 1].detach().numpy() + grid_x, grid_y = np.mgrid[ + min(xx) : max(xx) : 100j, min(yy) : max(yy) : 100j + ] + grid_z_mu = griddata( + (xx, yy), mu[:, 0].detach().numpy(), (grid_x, grid_y), method="linear" + ) + + fig, ax = plt.subplots(figsize=(15, 7)) + cs = ax.contourf(grid_x, grid_y, grid_z_mu) + ax.contour(cs, colors="k") + + ax.plot( + self.x[:, 0].detach().numpy(), + self.x[:, 1].detach().numpy(), + "ro", + ms=10, + ) + cbar = fig.colorbar(cs) + + ax.grid(c="k", ls="-", alpha=0.1) + + if f_true is not None: + f = f_true(xtest) + grid_z_f = griddata( + (xx, yy), + f[:, 0].detach().numpy(), + (grid_x, grid_y), + method="linear", + ) + fig, ax = plt.subplots(figsize=(15, 7)) + cs = ax.contourf(grid_x, grid_y, grid_z_f) + ax.contour(cs, colors="k") + cbar = fig.colorbar(cs) + ax.grid(c="k", ls="-", alpha=0.1) + if show == True: + plt.show() + + def visualize_contour( + self, xtest, f_true=None, show=True, points=True, ms=5, levels=20 + ): + [mu, _] = self.mean_std(xtest) + + if self.d == 2: + from scipy.interpolate import griddata + + xx = xtest[:, 0].detach().numpy() + yy = xtest[:, 1].detach().numpy() + grid_x, grid_y = np.mgrid[ + min(xx) : max(xx) : 100j, min(yy) : max(yy) : 100j + ] + grid_z_mu = griddata( + (xx, yy), mu[:, 0].detach().numpy(), (grid_x, grid_y), method="linear" + ) + fig, ax = plt.subplots(figsize=(15, 7)) + cs = ax.contourf(grid_x, grid_y, grid_z_mu) + ax.contour(cs, colors="k") + if points == True: + ax.plot( + self.x[:, 0].detach().numpy(), + self.x[:, 1].detach().numpy(), + "wo", + ms=ms, + alpha=0.5, + ) + cbar = fig.colorbar(cs) + ax.grid(c="k", ls="-", alpha=0.1) + + if f_true is not None: + f = f_true(xtest) + grid_z_f = griddata( + (xx, yy), + f[:, 0].detach().numpy(), + (grid_x, grid_y), + method="linear", + ) + fig, ax = plt.subplots(figsize=(15, 7)) + cs = ax.contourf(grid_x, grid_y, grid_z_f, levels=levels) + ax.contour(cs, colors="k") + cbar = fig.colorbar(cs) + ax.grid(c="k", ls="-", alpha=0.1) + if show == True: + plt.show() + return ax + + def visualize_quiver(self, xtest, size=2, norm=1): + from mpl_toolkits.mplot3d import axes3d, Axes3D + + [mu, std] = self.mean_std(xtest) + if self.d == 2: + from scipy.interpolate import griddata + + plt.figure(figsize=(15, 7)) + plt.clf() + ax = plt.axes(projection="3d") + xx = xtest[:, 0].detach().numpy() + yy = xtest[:, 1].detach().numpy() + grid_x, grid_y = np.mgrid[ + min(xx) : max(xx) : 100j, min(yy) : max(yy) : 100j + ] + grid_z_mu = griddata( + (xx, yy), mu[:, 0].detach().numpy(), (grid_x, grid_y), method="linear" + ) + # + + ax.scatter( + self.x[:, 0].detach().numpy(), + self.x[:, 1].detach().numpy(), + self.y[:, 0].detach().numpy(), + c="r", + s=100, + marker="o", + depthshade=False, + ) + + if self.beta is not None: + beta = self.beta(norm=norm) + grid_z2 = griddata( + (xx, yy), + (mu.detach() + beta * std.detach())[:, 0].detach().numpy(), + (grid_x, grid_y), + method="linear", + ) + ax.plot_surface(grid_x, grid_y, grid_z2, color="gray", alpha=0.2) + grid_z3 = griddata( + (xx, yy), + (mu.detach() - beta * std.detach())[:, 0].detach().numpy(), + (grid_x, grid_y), + method="linear", + ) + ax.plot_surface(grid_x, grid_y, grid_z3, color="gray", alpha=0.2) + + ax.plot_surface(grid_x, grid_y, grid_z_mu, color="r", alpha=0.4) + plt.title("Posterior mean prediction plus 2 st.deviation") + + derivatives = torch.zeros(xtest.size()[0], 2) + for index, point in enumerate(xtest): + derivatives[index, :] = self.mean_gradient_hessian(point.view(-1, 2)) + print(derivatives[index, :]) + + print(derivatives.size()) + + grid_der_x_mu = griddata( + (xx, yy), + derivatives[:, 0].detach().numpy(), + (grid_x, grid_y), + method="linear", + ) + grid_der_y_mu = griddata( + (xx, yy), + derivatives[:, 1].detach().numpy(), + (grid_x, grid_y), + method="linear", + ) + + fig, ax = plt.subplots(figsize=(15, 7)) + cs = ax.contourf(grid_x, grid_y, grid_z_mu) + + ax.contour(cs, colors="k") + + # Plot grid. + ax.grid(c="k", ls="-", alpha=0.1) + ax.quiver(grid_x, grid_y, grid_der_x_mu, grid_der_y_mu) + + plt.show() + + else: + print("Visualization not implemented") if __name__ == "__main__": - from stpy.continuous_processes.gauss_procc import GaussianProcess - from stpy.continuous_processes.fourier_fea import GaussianProcessFF - from stpy.continuous_processes.kernelized_features import KernelizedFeatures - from stpy.kernels import KernelFunction - from stpy.embeddings.embedding import HermiteEmbedding, RFFEmbedding - import stpy - import torch - import matplotlib.pyplot as plt - import numpy as np - - n = 1024 - N = 256 - gamma = 0.09 - s = 0.1 - # benchmark = stpy.test_functions.benchmarks.GaussianProcessSample(d =1, gamma = gamma, sigma = s, n = n) - benchmark = stpy.test_functions.benchmarks.Simple1DFunction(d=1, sigma=s) - - x = benchmark.initial_guess(N, adv_inv=True) - y = benchmark.eval(x) - xtest = benchmark.interval(1024) - - # GP = GaussianProcess(gamma=gamma, s=s) - # GP.fit_gp(x, y) - # GP.visualize(xtest, show=False, size=5) - # plt.show() - - m = 64 - kernel = KernelFunction(gamma=gamma) - embedding = HermiteEmbedding(gamma=gamma, m=m) - RFF = KernelizedFeatures(embedding=embedding, s=s, m=m) - RFF.fit_gp(x, y) - RFF.visualize(xtest, fig = False, show=False, size=5, matheron_kernel = kernel) - plt.show() \ No newline at end of file + from stpy.continuous_processes.gauss_procc import GaussianProcess + from stpy.continuous_processes.fourier_fea import GaussianProcessFF + from stpy.continuous_processes.kernelized_features import KernelizedFeatures + from stpy.kernels import KernelFunction + from stpy.embeddings.embedding import HermiteEmbedding, RFFEmbedding + import stpy + import torch + import matplotlib.pyplot as plt + import numpy as np + + n = 1024 + N = 256 + gamma = 0.09 + s = 0.1 + # benchmark = stpy.test_functions.benchmarks.GaussianProcessSample(d =1, gamma = gamma, sigma = s, n = n) + benchmark = stpy.test_functions.benchmarks.Simple1DFunction(d=1, sigma=s) + + x = benchmark.initial_guess(N, adv_inv=True) + y = benchmark.eval(x) + xtest = benchmark.interval(1024) + + # GP = GaussianProcess(gamma=gamma, s=s) + # GP.fit_gp(x, y) + # GP.visualize(xtest, show=False, size=5) + # plt.show() + + m = 64 + kernel = KernelFunction(gamma=gamma) + embedding = HermiteEmbedding(gamma=gamma, m=m) + RFF = KernelizedFeatures(embedding=embedding, s=s, m=m) + RFF.fit_gp(x, y) + RFF.visualize(xtest, fig=False, show=False, size=5, matheron_kernel=kernel) + plt.show() diff --git a/stpy/regularization/constraints.py b/stpy/regularization/constraints.py index 20a94df..77c945c 100644 --- a/stpy/regularization/constraints.py +++ b/stpy/regularization/constraints.py @@ -48,7 +48,7 @@ class AbsoluteValueConstraint(Constraints): def __init__(self, c=None): if c is None: - self.c = 1. + self.c = 1.0 else: self.c = c @@ -65,7 +65,7 @@ class QuadraticInequalityConstraint(Constraints): def __init__(self, Q, b=None, c=None): self.Q = Q if c is None: - self.c = 1. + self.c = 1.0 else: self.c = c if b is None: @@ -87,7 +87,6 @@ def __init__(self, q, c, d, groups): self.groups = groups self.convex = False - def get_list_cvxpy_constraints(self, theta): w = self.q / (1 - self.q) set_of_constraints = [] @@ -97,7 +96,7 @@ def get_list_cvxpy_constraints(self, theta): # l1 constraint constraints = [] weights = np.ones(d) * w - weights[i] = 1. + weights[i] = 1.0 group = self.groups[i] constraints.append(cp.norm(theta[group]).T * weights[i] <= self.c) # l_infinity constraint @@ -114,6 +113,7 @@ def get_constraint_cvxpy(self, theta): ## Does not work for non-convex constraints return None + class NonConvexNormConstraint(Constraints): def __init__(self, q, c, d): @@ -132,7 +132,7 @@ def construct(self, q, d): polytope = copy.copy(square) zero = np.zeros(d).reshape(1, -1) appex = copy.copy(zero) - appex[0, i // 2] = (float(i % 2) - 0.5) * 2. + appex[0, i // 2] = (float(i % 2) - 0.5) * 2.0 polytope = np.concatenate((appex, polytope)) self.vertex_description.append(polytope) self.polyhedra_vertex_description.append(polytope) @@ -167,7 +167,7 @@ def get_list_cvxpy_constraints(self, theta): # l1 constraint constraints = [] weights = np.ones(self.d) * w - weights[i] = 1. + weights[i] = 1.0 constraints.append(cp.abs(theta).T @ weights <= self.c) # l_infinity constraint for j in range(self.d): diff --git a/stpy/regularization/regularizer.py b/stpy/regularization/regularizer.py index 21888f0..9aa91c7 100644 --- a/stpy/regularization/regularizer.py +++ b/stpy/regularization/regularizer.py @@ -8,7 +8,7 @@ class Regularizer(ABC): - def __init__(self, lam=1.): + def __init__(self, lam=1.0): self.lam = lam self.groups = None self.convex = True @@ -19,7 +19,9 @@ def eval(self, theta): @abstractmethod def get_regularizer_cvxpy(self): - def reg(theta): return 0 + def reg(theta): + return 0 + return reg def is_convex(self): @@ -28,8 +30,10 @@ def is_convex(self): def get_constraint_set_cvxpy(self, theta, c): return [self.get_regularizer_cvxpy()(theta) <= c] - def get_constraint_object(self,c): - return CustomConstraint(None, lambda theta: self.get_constraint_set_cvxpy(theta, c)) + def get_constraint_object(self, c): + return CustomConstraint( + None, lambda theta: self.get_constraint_set_cvxpy(theta, c) + ) def hessian(self, theta_fit): pass @@ -37,27 +41,30 @@ def hessian(self, theta_fit): class L2Regularizer(Regularizer): - def __init__(self, lam=1.): - super().__init__(lam = lam) + def __init__(self, lam=1.0): + super().__init__(lam=lam) def get_regularizer_cvxpy(self): - def reg(theta): return self.lam*cp.sum_squares(theta)/2. + def reg(theta): + return self.lam * cp.sum_squares(theta) / 2.0 + return reg def eval(self, theta): - return self.lam*torch.sum(theta**2)/2. + return self.lam * torch.sum(theta**2) / 2.0 def hessian(self, theta): - return self.lam * torch.eye(n = theta.size()[0]).double()/2. + return self.lam * torch.eye(n=theta.size()[0]).double() / 2.0 + class NonConvexLqRegularizer(Regularizer): - def __init__(self, lam=1., q = 0.5): - super().__init__(lam = lam) + def __init__(self, lam=1.0, q=0.5): + super().__init__(lam=lam) self.q = q def eval(self, theta): - return self.lam*torch.sum(torch.abs(theta)**self.q) + return self.lam * torch.sum(torch.abs(theta) ** self.q) def hessian(self, theta): return None @@ -67,14 +74,16 @@ def is_convex(self): def get_regularizer_cvxpy(self, eta): def reg(theta): - norm = cp.sum_squares(theta/eta.reshape(-1,1)) - return self.q*0.5*norm*self.lam + norm = cp.sum_squares(theta / eta.reshape(-1, 1)) + return self.q * 0.5 * norm * self.lam + return reg + class GroupNonCovexLqRegularizer(NonConvexLqRegularizer): - def __init__(self, lam=1., q = 0.5, groups = None): - super().__init__(lam = lam) + def __init__(self, lam=1.0, q=0.5, groups=None): + super().__init__(lam=lam) self.q = q self.groups = groups @@ -82,43 +91,44 @@ def eval(self, theta): val = None for group in self.groups: if val is None: - val = torch.norm(theta[group])**self.q + val = torch.norm(theta[group]) ** self.q else: val += torch.norm(theta[group]) ** self.q - return self.lam*val + return self.lam * val def get_regularizer_cvxpy(self, eta): def reg(theta): val = None - for i,group in enumerate(self.groups): + for i, group in enumerate(self.groups): if val is None: - val = cp.sum_squares(theta[group])/eta[i].reshape(-1,1) + val = cp.sum_squares(theta[group]) / eta[i].reshape(-1, 1) else: - val += cp.sum_squares(theta[group])/eta[i].reshape(-1,1) - return val*self.lam + val += cp.sum_squares(theta[group]) / eta[i].reshape(-1, 1) + return val * self.lam + return reg class L1Regularizer(Regularizer): - def __init__(self, lam=1.): - super().__init__(lam = lam) + def __init__(self, lam=1.0): + super().__init__(lam=lam) def get_regularizer_cvxpy(self): def reg(theta): - return self.lam*cp.norm1(theta) + return self.lam * cp.norm1(theta) + return reg def eval(self, theta): - return self.lam*torch.sum(torch.abs(theta)) + return self.lam * torch.sum(torch.abs(theta)) def hessian(self, theta): - return self.lam * torch.eye(n = theta.size()[0]).double() - + return self.lam * torch.eye(n=theta.size()[0]).double() class GroupL1L2Regularizer(Regularizer): - def __init__(self, lam = 1., groups = None): + def __init__(self, lam=1.0, groups=None): self.groups = groups self.lam = lam pass @@ -137,14 +147,16 @@ def reg(theta): norm = cp.norm2(theta[group]) else: norm += cp.norm2(theta[group]) - return cp.square(norm)*self.lam + return cp.square(norm) * self.lam + return reg def hessian(self, theta): return None + class NestedGroupL1Regularizer(Regularizer): - def __init__(self, lam = 1., groups = None, weights = None): + def __init__(self, lam=1.0, groups=None, weights=None): self.groups = groups self.lam = lam self.weights = weights @@ -153,7 +165,7 @@ def __init__(self, lam = 1., groups = None, weights = None): def eval(self, theta): norm = 0 for i, group in enumerate(self.groups): - norm += self.weights[i]*torch.sum(torch.abs(theta[group])) + norm += self.weights[i] * torch.sum(torch.abs(theta[group])) return norm**2 * self.lam def get_regularizer_cvxpy(self): @@ -167,16 +179,17 @@ def reg(theta): else: norm += self.weights[i] * cp.norm1(theta[group]) - return norm*self.lam + return norm * self.lam return reg def hessian(self, theta): return None + class NestedGroupL1L2Regularizer(Regularizer): - def __init__(self, lam = 1., groups = None, weights = None): + def __init__(self, lam=1.0, groups=None, weights=None): self.groups = groups self.lam = lam self.weights = weights @@ -199,7 +212,7 @@ def reg(theta): else: norm += self.weights[i] * cp.norm2(theta[group]) - return cp.square(norm)*self.lam + return cp.square(norm) * self.lam return reg @@ -208,7 +221,7 @@ def hessian(self, theta): class NonConvexNormRegularizer(Regularizer): - def __init__(self, lam = 1., q = 1. , groups = None): + def __init__(self, lam=1.0, q=1.0, groups=None): self.groups = groups self.lam = lam self.q = q @@ -230,6 +243,6 @@ def reg(theta): else: norm += self.weights[i] * cp.norm2(theta[group]) - return cp.square(norm)*self.lam + return cp.square(norm) * self.lam return reg diff --git a/stpy/regularization/sdp_constraint.py b/stpy/regularization/sdp_constraint.py index cb8d080..0c3d460 100644 --- a/stpy/regularization/sdp_constraint.py +++ b/stpy/regularization/sdp_constraint.py @@ -2,21 +2,23 @@ from stpy.regularization.constraints import Constraints import cvxpy as cp + class SDPConstraint(Constraints): - def __init__(self, type="trace", rank=1.): + def __init__(self, type="trace", rank=1.0): super().__init__() self.trace_constraint = None self.lambda_max_constraint = None self.psd_constraint = "Yes" - self.matrix_bound = 1. + self.matrix_bound = 1.0 self.type = type self.rank = rank - self.custom_regularization= None + self.custom_regularization = None self.fit_params() + def fit_params(self): if self.type == "stable-rank": self.matrix_bound = self.rank @@ -24,12 +26,14 @@ def fit_params(self): def get_type(self): return self.type - def get_constraint_cvxpy(self,A,l,s_value): + def get_constraint_cvxpy(self, A, l, s_value): constraints = [] # add a classical psd constraint if self.matrix_bound is not None: - constraints+=[cp.trace(A) <= self.matrix_bound * l] + [cp.lambda_max(A) <= l] + constraints += [cp.trace(A) <= self.matrix_bound * l] + [ + cp.lambda_max(A) <= l + ] # trace regularization if self.trace_constraint is not None: @@ -37,13 +41,13 @@ def get_constraint_cvxpy(self,A,l,s_value): # restrict the max eigenvalue if s_value is not None: - constraints += [l<=s_value] + constraints += [l <= s_value] # lambda_max regularization if self.lambda_max_constraint is not None: constraints += [cp.lambda_max(A) <= self.lambda_max_constraint] if self.custom_regularization is not None: - constraints += [self.custom_regularization(A,l,s_value)] + constraints += [self.custom_regularization(A, l, s_value)] - return constraints \ No newline at end of file + return constraints diff --git a/stpy/regularization/simplex_regularizer.py b/stpy/regularization/simplex_regularizer.py index 1383812..e2b7a93 100644 --- a/stpy/regularization/simplex_regularizer.py +++ b/stpy/regularization/simplex_regularizer.py @@ -2,48 +2,57 @@ import cvxpy as cp import numpy as np import torch + + class ProbabilityRegularizer(Regularizer): def __init__(self, lam=1, w=None, d=1, **kwargs): super().__init__(lam) self.lam = lam if w is None: - self.w = torch.ones(d).double()/d + self.w = torch.ones(d).double() / d else: self.w = w self.convex = True self.dcp = True self.d = d self.name = "default" + + class SupRegularizer(ProbabilityRegularizer): - def __init__(self, constrained = False, version = '1',**kwargs): + def __init__(self, constrained=False, version="1", **kwargs): super().__init__(**kwargs) self.convex = False self.name = "sup" self.constrained = constrained self.version = version + def get_regularizer_cvxpy(self): pass def get_cvxpy_objectives_constraints_variables(self, d): if not self.constrained: - print (d, self.w ) - objectives = [lambda x: cp.inv_pos(x[i])*self.lam/self.w[i] for i in range(d)] + print(d, self.w) + objectives = [ + lambda x: cp.inv_pos(x[i]) * self.lam / self.w[i] for i in range(d) + ] constriants = [lambda x: [] for i in range(d)] return objectives, constriants, [] - elif self.version == '1': - objectives = [lambda x: 0. for i in range(d)] - #constriants = [lambda x: [cp.inv_pos(x[i])<=1/self.lam]+[cp.max(x)<=x[i]] for i in range(d)] - constriants = [lambda x: [x[i] >= self.lam] for i in range(d)] + elif self.version == "1": + objectives = [lambda x: 0.0 for i in range(d)] + # constriants = [lambda x: [cp.inv_pos(x[i])<=1/self.lam]+[cp.max(x)<=x[i]] for i in range(d)] + constriants = [lambda x: [x[i] >= self.lam] for i in range(d)] return objectives, constriants, [] else: - objectives = [lambda x: 0.] + objectives = [lambda x: 0.0] I = np.eye(d) - constriants = [lambda x: [ I*self.lam*cp.sum(x) << d*cp.diag(x)]] + constriants = [lambda x: [I * self.lam * cp.sum(x) << d * cp.diag(x)]] return objectives, constriants, [] + def eval(self, theta): - return self.lam/torch.max(self.w*theta) + return self.lam / torch.max(self.w * theta) + class DirichletRegularizer(ProbabilityRegularizer): @@ -52,11 +61,12 @@ def __init__(self, **kwargs): self.name = "dirichlet" def get_regularizer_cvxpy(self): - return lambda x: cp.sum((self.w-1)@cp.log(x)) * self.lam + return lambda x: cp.sum((self.w - 1) @ cp.log(x)) * self.lam def eval(self, theta): return self.lam / torch.sum(torch.abs(theta)) + class WeightedAitchisonRegularizer(ProbabilityRegularizer): def __init__(self, **kwargs): @@ -64,21 +74,20 @@ def __init__(self, **kwargs): self.dcp = False self.name = "aitchison" - def get_regularizer_cvxpy(self): def reg(x): - # outer = sum([cp.log(x[j])*cp.log(x[i]) for i,j in zip(range(self.d),range(self.d)) if i!=j]) - return 2*self.lam*(cp.sum(cp.log(x)**2)) + # outer = sum([cp.log(x[j])*cp.log(x[i]) for i,j in zip(range(self.d),range(self.d)) if i!=j]) + return 2 * self.lam * (cp.sum(cp.log(x) ** 2)) return reg + def eval(self, theta): return self.lam / torch.sum(torch.abs(theta)) class L1MeasureRegularizer(ProbabilityRegularizer): def get_regularizer_cvxpy(self): - return lambda x: cp.norm1(x)*self.lam + return lambda x: cp.norm1(x) * self.lam def eval(self, theta): - return self.lam/torch.sum(torch.abs(theta)) - + return self.lam / torch.sum(torch.abs(theta)) diff --git a/stpy/sampling/hmc.py b/stpy/sampling/hmc.py index 879fd17..1e6ce13 100644 --- a/stpy/sampling/hmc.py +++ b/stpy/sampling/hmc.py @@ -1,5 +1,7 @@ -params_hmc = hamiltorch.sample(log_prob_func=log_prob_func, - params_init=params_init, - num_samples=num_samples, - step_size=step_size, - num_steps_per_sample=num_steps_per_sample) +params_hmc = hamiltorch.sample( + log_prob_func=log_prob_func, + params_init=params_init, + num_samples=num_samples, + step_size=step_size, + num_steps_per_sample=num_steps_per_sample, +) diff --git a/stpy/sampling/langevin.py b/stpy/sampling/langevin.py index c7255b7..1914ae5 100644 --- a/stpy/sampling/langevin.py +++ b/stpy/sampling/langevin.py @@ -2,25 +2,30 @@ import torch import scipy -class LangevinSampler(): - def __init__(self, verbose = False): - self.verbose = verbose - pass +class LangevinSampler: - def calculate(self, HessianF,theta0): - W = HessianF(theta0) - L = float(scipy.sparse.linalg.eigsh(W.numpy(), k=1, which='LM', return_eigenvectors=False, tol=1e-3)) - return L + def __init__(self, verbose=False): + self.verbose = verbose + pass - def sample(self, F, nablaF, HessianF, theta0, steps = 100): - L = self.calculate(HessianF, theta0) - eta = 0.5 / (L + 1) - m = theta0.size()[0] - theta = theta0 - for k in range(steps): - w = torch.randn(size=(m, 1)).double() - theta = theta - eta * nablaF(theta) + np.sqrt(2 * eta) * w - if self.verbose == True: - print("Iter:", k, theta.T) - return theta \ No newline at end of file + def calculate(self, HessianF, theta0): + W = HessianF(theta0) + L = float( + scipy.sparse.linalg.eigsh( + W.numpy(), k=1, which="LM", return_eigenvectors=False, tol=1e-3 + ) + ) + return L + + def sample(self, F, nablaF, HessianF, theta0, steps=100): + L = self.calculate(HessianF, theta0) + eta = 0.5 / (L + 1) + m = theta0.size()[0] + theta = theta0 + for k in range(steps): + w = torch.randn(size=(m, 1)).double() + theta = theta - eta * nablaF(theta) + np.sqrt(2 * eta) * w + if self.verbose == True: + print("Iter:", k, theta.T) + return theta diff --git a/stpy/sampling/proximal_langevin.py b/stpy/sampling/proximal_langevin.py index 2cf69c1..322ccab 100644 --- a/stpy/sampling/proximal_langevin.py +++ b/stpy/sampling/proximal_langevin.py @@ -2,20 +2,26 @@ import torch import numpy as np + def ProximalLangevin(LangevinSampler): - def sample(self, F, nablaF, HessianF, theta0, prox, steps = 100): - L = self.calculate(HessianF, theta0) - eta = 0.5 / (L + 1) - m = theta0.size()[0] - theta = theta0 - for k in range(steps): - w = torch.randn(size=(m, 1)).double() - theta = (1 - eta) * theta - eta * nablaF(theta) + eta * prox(theta) + np.sqrt(2 * eta) * w - if self.verbose == True: - print("Iter:", k, theta.T) - return prox(theta) + def sample(self, F, nablaF, HessianF, theta0, prox, steps=100): + L = self.calculate(HessianF, theta0) + eta = 0.5 / (L + 1) + m = theta0.size()[0] + theta = theta0 + for k in range(steps): + w = torch.randn(size=(m, 1)).double() + theta = ( + (1 - eta) * theta + - eta * nablaF(theta) + + eta * prox(theta) + + np.sqrt(2 * eta) * w + ) + if self.verbose == True: + print("Iter:", k, theta.T) + return prox(theta) def MirrorLangevin(LangvinSampler): - pass \ No newline at end of file + pass diff --git a/stpy/sampling/sampling_helper.py b/stpy/sampling/sampling_helper.py index 6cbef04..0024a3a 100644 --- a/stpy/sampling/sampling_helper.py +++ b/stpy/sampling/sampling_helper.py @@ -1,58 +1,56 @@ - - import torch import numpy as np import matplotlib.pyplot as plt -def get_increment(eta, steps, f, w0, path = False): - """ - :param eta: terminal time - :param steps: number of steps - :param f: the operator - :param w0: initial point - :return: - """ +def get_increment(eta, steps, f, w0, path=False): + """ - tau = eta/steps - w = w0 - sequence = [] + :param eta: terminal time + :param steps: number of steps + :param f: the operator + :param w0: initial point + :return: + """ - for i in range(steps): + tau = eta / steps + w = w0 + sequence = [] + for i in range(steps): - n = torch.randn(size = w0.size()).double() - w = w + np.sqrt(2*tau)*f(w,n) - if path: - sequence.append(w) + n = torch.randn(size=w0.size()).double() + w = w + np.sqrt(2 * tau) * f(w, n) + if path: + sequence.append(w) - if path: - return sequence - else: - return w + if path: + return sequence + else: + return w -if __name__ == "__main__": - f = lambda w: torch.diag(1./torch.abs(w.view(-1))) - d = 1 - w0 = torch.zeros(size = (d,1)).double() + 2 - step = 100 - path = get_increment(2, step, f, w0, path = True) - #plt.plot(path) - - i = 0 - colors = ['k','r','b','orange','brown','purple'] - for steps in [5,10,20,100,200,500]: - - repeats = 100 - ws = [] - for _ in range(repeats): - path = get_increment(2,steps,f,w0, path = True) - xtest = torch.linspace(0,2,steps) - plt.plot(xtest, path, color = colors[i]) - i = i + 1 - # plt.hist(np.array(ws), label = str(step)) - - plt.legend() - plt.show() +if __name__ == "__main__": + f = lambda w: torch.diag(1.0 / torch.abs(w.view(-1))) + d = 1 + w0 = torch.zeros(size=(d, 1)).double() + 2 + step = 100 + path = get_increment(2, step, f, w0, path=True) + # plt.plot(path) + + i = 0 + colors = ["k", "r", "b", "orange", "brown", "purple"] + for steps in [5, 10, 20, 100, 200, 500]: + + repeats = 100 + ws = [] + for _ in range(repeats): + path = get_increment(2, steps, f, w0, path=True) + xtest = torch.linspace(0, 2, steps) + plt.plot(xtest, path, color=colors[i]) + i = i + 1 + # plt.hist(np.array(ws), label = str(step)) + + plt.legend() + plt.show() diff --git a/stpy/test_functions/benchmarks.py b/stpy/test_functions/benchmarks.py index 9cd2c47..3b4907f 100755 --- a/stpy/test_functions/benchmarks.py +++ b/stpy/test_functions/benchmarks.py @@ -6,513 +6,566 @@ from stpy.continuous_processes.gauss_procc import GaussianProcess -class BenchmarkFunction(): - - def __init__(self, type="discrete", d=1, gamma=1.0, dts=None, **kwargs): - self.scale = 1.0 - self.type = type - self.gamma = gamma - self.d = d - self.dts = None - self.groups = None - - def eval_noiseless(self, X): - if X.size()[1] != self.d: - raise AssertionError("Invalid dimension for the Benchmark function ...") - pass - - def eval(self, X, sigma=None): - z = self.eval_noiseless(X) - if sigma is None: - y = z/self.scale + self.s * torch.randn(X.size()[0], 1, dtype=torch.float64) - else: - y = z/self.scale + sigma * torch.randn(X.size()[0], 1, dtype=torch.float64) - return y - - def optimum(self): - return 1.0 - - def maximum(self, xtest=None): - if self.type == "discrete": - self.max = self.maximum_discrete(xtest) - else: - self.max = self.maximum_continuous() - return self.max - - def maximum_discrete(self, xtest): - maximum =torch.max(self.eval_noiseless(xtest)) - return maximum - - def maximum_continuous(self): - return 1.0 - - def scale_max(self, xtest=None): - self.scale = self.maximum(xtest=xtest) - print("Scaling with", self.scale) - - def optimize(self, xtest, sigma, restarts=5): - (n, d) = xtest.size() - ytest = self.eval(xtest, sigma=sigma) - kernel = stpy.kernels.KernelFunction(kernel_name="ard", gamma=torch.ones(d, dtype=torch.float64) * 0.1, - groups=self.groups) - GP = stpy.continuous_processes.gauss_procc.GaussianProcess(kernel_custom=kernel, s=sigma, d=d) - GP.fit(xtest, ytest) - GP.optimize_params(type="bandwidth", restarts=restarts) - print("Optimized") - # GP.visualize(xtest) - self.gamma = torch.min(kernel.gamma) - return self.gamma - - def return_params(self): - return (self.gamma, self.groups, self.d) - - def bandwidth(self): - return self.gamma - - def set_group_param(self, groups): - self.groups = groups - - def bounds(self): - b = tuple([(-0.5, 0.5) for i in range(self.d)]) - return b - - def initial_guess(self, N, adv_inv=False): - if adv_inv == False: - x = torch.from_numpy(np.random.uniform(-0.5, 0.5, size=(N, self.d))) - else: - x = torch.from_numpy(np.random.uniform(-0.5, 0., size=(N, self.d))) - return x - - def interval(self, n, L_infinity_ball=0.5): - if n == None: - xtest = None - else: - xtest = torch.from_numpy(stpy.helpers.helper.interval(n, self.d, L_infinity_ball=L_infinity_ball)) - return xtest - - def visualize(self, xtest): - import matplotlib.pyplot as plt - d = xtest.size()[1] - if d == 1: - plt.figure(figsize=(15, 7)) - plt.clf() - plt.plot(xtest.numpy(), self.eval_noiseless(xtest)[:, 0].numpy()) - plt.show() - elif d == 2: - from scipy.interpolate import griddata - plt.figure(figsize=(15, 7)) - plt.clf() - ax = plt.axes(projection='3d') - xx = xtest[:, 0].numpy() - yy = xtest[:, 1].numpy() - grid_x, grid_y = np.mgrid[min(xx):max(xx):100j, min(yy):max(yy):100j] - grid_z = griddata((xx, yy), self.eval_noiseless(xtest)[:, 0].numpy(), (grid_x, grid_y), method='linear') - ax.plot_surface(grid_x, grid_y, grid_z, color='b', alpha=0.4) - plt.show() +class BenchmarkFunction: + + def __init__(self, type="discrete", d=1, gamma=1.0, dts=None, **kwargs): + self.scale = 1.0 + self.type = type + self.gamma = gamma + self.d = d + self.dts = None + self.groups = None + + def eval_noiseless(self, X): + if X.size()[1] != self.d: + raise AssertionError("Invalid dimension for the Benchmark function ...") + pass + + def eval(self, X, sigma=None): + z = self.eval_noiseless(X) + if sigma is None: + y = z / self.scale + self.s * torch.randn( + X.size()[0], 1, dtype=torch.float64 + ) + else: + y = z / self.scale + sigma * torch.randn( + X.size()[0], 1, dtype=torch.float64 + ) + return y + + def optimum(self): + return 1.0 + + def maximum(self, xtest=None): + if self.type == "discrete": + self.max = self.maximum_discrete(xtest) + else: + self.max = self.maximum_continuous() + return self.max + + def maximum_discrete(self, xtest): + maximum = torch.max(self.eval_noiseless(xtest)) + return maximum + + def maximum_continuous(self): + return 1.0 + + def scale_max(self, xtest=None): + self.scale = self.maximum(xtest=xtest) + print("Scaling with", self.scale) + + def optimize(self, xtest, sigma, restarts=5): + (n, d) = xtest.size() + ytest = self.eval(xtest, sigma=sigma) + kernel = stpy.kernels.KernelFunction( + kernel_name="ard", + gamma=torch.ones(d, dtype=torch.float64) * 0.1, + groups=self.groups, + ) + GP = stpy.continuous_processes.gauss_procc.GaussianProcess( + kernel_custom=kernel, s=sigma, d=d + ) + GP.fit(xtest, ytest) + GP.optimize_params(type="bandwidth", restarts=restarts) + print("Optimized") + # GP.visualize(xtest) + self.gamma = torch.min(kernel.gamma) + return self.gamma + + def return_params(self): + return (self.gamma, self.groups, self.d) + + def bandwidth(self): + return self.gamma + + def set_group_param(self, groups): + self.groups = groups + + def bounds(self): + b = tuple([(-0.5, 0.5) for i in range(self.d)]) + return b + + def initial_guess(self, N, adv_inv=False): + if adv_inv == False: + x = torch.from_numpy(np.random.uniform(-0.5, 0.5, size=(N, self.d))) + else: + x = torch.from_numpy(np.random.uniform(-0.5, 0.0, size=(N, self.d))) + return x + + def interval(self, n, L_infinity_ball=0.5): + if n == None: + xtest = None + else: + xtest = torch.from_numpy( + stpy.helpers.helper.interval(n, self.d, L_infinity_ball=L_infinity_ball) + ) + return xtest + + def visualize(self, xtest): + import matplotlib.pyplot as plt + + d = xtest.size()[1] + if d == 1: + plt.figure(figsize=(15, 7)) + plt.clf() + plt.plot(xtest.numpy(), self.eval_noiseless(xtest)[:, 0].numpy()) + plt.show() + elif d == 2: + from scipy.interpolate import griddata + + plt.figure(figsize=(15, 7)) + plt.clf() + ax = plt.axes(projection="3d") + xx = xtest[:, 0].numpy() + yy = xtest[:, 1].numpy() + grid_x, grid_y = np.mgrid[ + min(xx) : max(xx) : 100j, min(yy) : max(yy) : 100j + ] + grid_z = griddata( + (xx, yy), + self.eval_noiseless(xtest)[:, 0].numpy(), + (grid_x, grid_y), + method="linear", + ) + ax.plot_surface(grid_x, grid_y, grid_z, color="b", alpha=0.4) + plt.show() class CamelbackBenchmark(BenchmarkFunction): - def __init__(self, **kwargs): - super().__init__(**kwargs) - self.d = 2 - - def eval_noiseless(self, X): - super().eval_noiseless(X) - xx = X[:, 0] * 4 - yy = X[:, 1] * 2 - y = (4. - 2.1 * xx ** 2 + (xx ** 4) / 3.) * (xx ** 2) + xx * yy + (-4. + 4 * (yy ** 2)) * (yy ** 2) - y = -y.view(X.size()[0], 1) - # y = np.tanh(y) - y = y / 5. - return y / self.scale + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.d = 2 + + def eval_noiseless(self, X): + super().eval_noiseless(X) + xx = X[:, 0] * 4 + yy = X[:, 1] * 2 + y = ( + (4.0 - 2.1 * xx**2 + (xx**4) / 3.0) * (xx**2) + + xx * yy + + (-4.0 + 4 * (yy**2)) * (yy**2) + ) + y = -y.view(X.size()[0], 1) + # y = np.tanh(y) + y = y / 5.0 + return y / self.scale # def optimize(self,xtest,sigma, restarts = 5): # self.gamma = 0.3 + # self.gamma = 0.3 class QuadraticBenchmark(BenchmarkFunction): - def __init__(self, **kwargs): - super().__init__(**kwargs) - self.d = kwargs['d'] - self.type = "continuous" + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.d = kwargs["d"] + self.type = "continuous" - if 'R' in kwargs: - self.R = kwargs['R'] - print("Quadratic Problem: Rotating - no longer additive.") - print(self.R) - else: - self.R = torch.eye(self.d, self.d, dtype=torch.float64) - print("Quadratic Problem: Additive.") + if "R" in kwargs: + self.R = kwargs["R"] + print("Quadratic Problem: Rotating - no longer additive.") + print(self.R) + else: + self.R = torch.eye(self.d, self.d, dtype=torch.float64) + print("Quadratic Problem: Additive.") - def eval_noiseless(self, X): - D = torch.diag(torch.Tensor([1., 2.]).double()) - super().eval_noiseless(X) - (n, d) = X.size() - X = X @ self.R - sum_ = torch.sum((X @ D) ** 2, dim=1) - print(sum_.size()) - return -sum_.view(-1, 1) / self.scale + 1 + def eval_noiseless(self, X): + D = torch.diag(torch.Tensor([1.0, 2.0]).double()) + super().eval_noiseless(X) + (n, d) = X.size() + X = X @ self.R + sum_ = torch.sum((X @ D) ** 2, dim=1) + print(sum_.size()) + return -sum_.view(-1, 1) / self.scale + 1 - def bandwidth(self): - return 0.2 + def bandwidth(self): + return 0.2 class PolynomialBenchmark(BenchmarkFunction): - def __init__(self, **kwargs): - super().__init__(**kwargs) - self.d = kwargs['d'] - self.type = "continuous" - - if 'R' in kwargs: - self.R = kwargs['R'] - print("Quadratic Problem: Rotating - no longer additive.") - print(self.R) - else: - self.R = torch.eye(self.d, self.d, dtype=torch.float64) - print("Quadratic Problem: Additive.") - - def eval_noiseless(self, X): - D = torch.diag(torch.Tensor([1., 2.]).double()) - super().eval_noiseless(X) - (n, d) = X.size() - X = X @ self.R - sum_ = torch.sum((X @ D) ** 2, dim=1) + torch.sum((X @ D) ** 3, dim=1) * 0.5 + torch.sum((X @ D) ** 4, dim=1) - print(sum_.size()) - return -sum_.view(-1, 1) / self.scale + 1 - - def bandwidth(self): - return 0.2 + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.d = kwargs["d"] + self.type = "continuous" + + if "R" in kwargs: + self.R = kwargs["R"] + print("Quadratic Problem: Rotating - no longer additive.") + print(self.R) + else: + self.R = torch.eye(self.d, self.d, dtype=torch.float64) + print("Quadratic Problem: Additive.") + + def eval_noiseless(self, X): + D = torch.diag(torch.Tensor([1.0, 2.0]).double()) + super().eval_noiseless(X) + (n, d) = X.size() + X = X @ self.R + sum_ = ( + torch.sum((X @ D) ** 2, dim=1) + + torch.sum((X @ D) ** 3, dim=1) * 0.5 + + torch.sum((X @ D) ** 4, dim=1) + ) + print(sum_.size()) + return -sum_.view(-1, 1) / self.scale + 1 + + def bandwidth(self): + return 0.2 class MichalBenchmark(BenchmarkFunction): - def __init__(self, **kwargs): - super().__init__(**kwargs) - self.d = kwargs['d'] - self.type = "continuous" - - if 'R' in kwargs: - self.R = kwargs['R'] - print("Michal Problem: Rotating - no longer additive.") - print(self.R) - else: - self.R = torch.eye(self.d, self.d, dtype=torch.float64) - print("Michal Problem: Additive.") - - def eval_noiseless(self, X): - super().eval_noiseless(X) - (n, d) = X.size() - X = X @ self.R - X = X / 0.75 - X = (X + 0.5) * np.pi - ar = torch.from_numpy(np.arange(1, d + 1, 1, dtype=np.float64)) - sum_ = torch.sin(X) * torch.pow(torch.sin(ar * X / np.pi), int(2 * d)) - sum_ = torch.sum(sum_, dim=1).view(-1, 1) - return sum_ / self.scale - - def optimize(self, xtest, sigma, restarts=5, n=512): - xtest = torch.zeros(n, self.d, dtype=torch.float64) - xtest[:, 0] = torch.linspace(-0.5, 0.5, n, dtype=torch.float64) - ytest = self.eval(xtest, sigma=sigma) - kernel = stpy.kernels.KernelFunction(kernel_name="ard", gamma=torch.ones(self.d, dtype=torch.float64) * 0.1, - groups=self.groups) - GP = GaussianProcess(kernel=kernel, s=sigma, d=self.d) - GP.fit_gp(xtest, ytest) - #GP.optimize_params(type="bandwidth", restarts=restarts) - #print("Optimized") - #GP.back_prop - self.gamma = torch.min(kernel.gamma) - return self.gamma - - def bandwidth(self): - return 0.2 - - def maximum_continuous(self): - opt = np.ones(shape=(20)) - # holds with different constnat - opt[0] = 2.93254 - opt[1] = 2.34661 - opt[2] = 1.64107 - opt[3] = 1.24415 - opt[4] = 0.999643 - opt[5] = 0.834879 - opt[6] = 2.1089 - opt[7] = 1.84835 - opt[8] = 1.64448 - opt[9] = 1.48089 - opt[10] = 1.34678 - opt[11] = 1.2349 - opt[12] = 1.89701 - opt[13] = 1.76194 - opt[14] = 1.64477 - opt[15] = 1.54218 - opt[16] = 1.45162 - opt[17] = 1.37109 - opt[18] = 1.81774 - return float(opt[self.d]) + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.d = kwargs["d"] + self.type = "continuous" + + if "R" in kwargs: + self.R = kwargs["R"] + print("Michal Problem: Rotating - no longer additive.") + print(self.R) + else: + self.R = torch.eye(self.d, self.d, dtype=torch.float64) + print("Michal Problem: Additive.") + + def eval_noiseless(self, X): + super().eval_noiseless(X) + (n, d) = X.size() + X = X @ self.R + X = X / 0.75 + X = (X + 0.5) * np.pi + ar = torch.from_numpy(np.arange(1, d + 1, 1, dtype=np.float64)) + sum_ = torch.sin(X) * torch.pow(torch.sin(ar * X / np.pi), int(2 * d)) + sum_ = torch.sum(sum_, dim=1).view(-1, 1) + return sum_ / self.scale + + def optimize(self, xtest, sigma, restarts=5, n=512): + xtest = torch.zeros(n, self.d, dtype=torch.float64) + xtest[:, 0] = torch.linspace(-0.5, 0.5, n, dtype=torch.float64) + ytest = self.eval(xtest, sigma=sigma) + kernel = stpy.kernels.KernelFunction( + kernel_name="ard", + gamma=torch.ones(self.d, dtype=torch.float64) * 0.1, + groups=self.groups, + ) + GP = GaussianProcess(kernel=kernel, s=sigma, d=self.d) + GP.fit_gp(xtest, ytest) + # GP.optimize_params(type="bandwidth", restarts=restarts) + # print("Optimized") + # GP.back_prop + self.gamma = torch.min(kernel.gamma) + return self.gamma + + def bandwidth(self): + return 0.2 + + def maximum_continuous(self): + opt = np.ones(shape=(20)) + # holds with different constnat + opt[0] = 2.93254 + opt[1] = 2.34661 + opt[2] = 1.64107 + opt[3] = 1.24415 + opt[4] = 0.999643 + opt[5] = 0.834879 + opt[6] = 2.1089 + opt[7] = 1.84835 + opt[8] = 1.64448 + opt[9] = 1.48089 + opt[10] = 1.34678 + opt[11] = 1.2349 + opt[12] = 1.89701 + opt[13] = 1.76194 + opt[14] = 1.64477 + opt[15] = 1.54218 + opt[16] = 1.45162 + opt[17] = 1.37109 + opt[18] = 1.81774 + return float(opt[self.d]) class StybTangBenchmark(BenchmarkFunction): - def __init__(self, **kwargs): - super().__init__(**kwargs) - self.d = kwargs['d'] - self.type = "discrete" - if 'R' in kwargs: - self.R = kwargs['R'] - print("Stybtang Problem: Rotating - no longer additive.") - print(self.R) - else: - self.R = torch.eye(self.d, self.d, dtype=torch.float64) - print("Stybtang Problem: Additive") - - def eval_noiseless(self, X): - super().eval_noiseless(X) - (n, d) = X.size() - X = X @ self.R - X = X * 8 - Y = X ** 2 - sum_ = torch.sum(Y ** 2 - 16. * Y + 5 * X, dim=1).view(-1, 1) - return -(0.5 * sum_ / (d * 200.) + 0.5)/self.scale - - # def maximum_continuous(self): - # opt = np.ones(shape=(self.d)) * (-2.9035) / 8 - # opt = torch.from_numpy(opt.reshape(1, -1)) - # value = self.eval_noiseless(opt)[0][0] * 16 - # return value - # - # def optimize(self, xtest, sigma, restarts=5, n=512): - # xtest = torch.zeros(n, self.d, dtype=torch.float64) - # xtest[:, 0] = torch.linspace(-0.5, 0.5, n, dtype=torch.float64) - # ytest = self.eval(xtest, sigma=sigma) - # kernel = stpy.kernels.KernelFunction(kernel_name="ard", gamma=torch.ones(self.d, dtype=torch.float64) * 0.1, - # groups=self.groups) - # GP = GaussianProcess(kernel_custom=kernel, s=sigma, d=self.d) - # GP.fit(xtest, ytest) - # GP.optimize_params(type="bandwidth", restarts=restarts) - # print("Optimized") - # self.gamma = torch.min(kernel.gamma) - # return self.gamma + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.d = kwargs["d"] + self.type = "discrete" + if "R" in kwargs: + self.R = kwargs["R"] + print("Stybtang Problem: Rotating - no longer additive.") + print(self.R) + else: + self.R = torch.eye(self.d, self.d, dtype=torch.float64) + print("Stybtang Problem: Additive") + + def eval_noiseless(self, X): + super().eval_noiseless(X) + (n, d) = X.size() + X = X @ self.R + X = X * 8 + Y = X**2 + sum_ = torch.sum(Y**2 - 16.0 * Y + 5 * X, dim=1).view(-1, 1) + return -(0.5 * sum_ / (d * 200.0) + 0.5) / self.scale + + # def maximum_continuous(self): + # opt = np.ones(shape=(self.d)) * (-2.9035) / 8 + # opt = torch.from_numpy(opt.reshape(1, -1)) + # value = self.eval_noiseless(opt)[0][0] * 16 + # return value + # + # def optimize(self, xtest, sigma, restarts=5, n=512): + # xtest = torch.zeros(n, self.d, dtype=torch.float64) + # xtest[:, 0] = torch.linspace(-0.5, 0.5, n, dtype=torch.float64) + # ytest = self.eval(xtest, sigma=sigma) + # kernel = stpy.kernels.KernelFunction(kernel_name="ard", gamma=torch.ones(self.d, dtype=torch.float64) * 0.1, + # groups=self.groups) + # GP = GaussianProcess(kernel_custom=kernel, s=sigma, d=self.d) + # GP.fit(xtest, ytest) + # GP.optimize_params(type="bandwidth", restarts=restarts) + # print("Optimized") + # self.gamma = torch.min(kernel.gamma) + # return self.gamma + class GeneralizedAdditiveOverlap(BenchmarkFunction): - def __init__(self, **kwargs): - super().__init__(**kwargs) - self.d = kwargs['d'] - self.type = "continuous" - - def eval_noiseless(self, X): - super().eval_noiseless(X) - (n, d) = X.size() - sum_ = torch.sum(torch.exp(-(torch.from_numpy(np.diff(X.numpy(), axis=1) / 0.25)) ** 2), dim=1).view(-1, 1) - return 0.5 * sum_ / self.scale - - def maximum_continuous(self): - opt = torch.from_numpy(np.zeros(shape=(1, self.d))) - value = self.eval_noiseless(opt)[0][0] - return value - - def optimize(self, xtest, sigma, restarts=5, n=512): - xtest = torch.zeros(n, self.d, dtype=torch.float64) - xtest[:, 0] = torch.linspace(-0.5, 0.5, n, dtype=torch.float64) - ytest = self.eval(xtest, sigma=sigma) - kernel = stpy.kernels.KernelFunction(kernel_name="ard", gamma=torch.ones(self.d, dtype=torch.float64) * 0.1, - groups=self.groups) - GP = stpy.continuous_processes.gauss_procc.GaussianProcess(kernel_custom=kernel, s=sigma, d=self.d) - GP.fit(xtest, ytest) - GP.optimize_params(type="bandwidth", restarts=restarts) - print("Optimized") - # self.gamma = torch.min(kernel.gamma) - # self.gamma = torch.zeros(1,1,dtype = torch.DoubleTensor) - # self.gamma[0,0] =0.35 - self.gamma = torch.Tensor([0.35]).double() - return self.gamma + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.d = kwargs["d"] + self.type = "continuous" + + def eval_noiseless(self, X): + super().eval_noiseless(X) + (n, d) = X.size() + sum_ = torch.sum( + torch.exp(-((torch.from_numpy(np.diff(X.numpy(), axis=1) / 0.25)) ** 2)), + dim=1, + ).view(-1, 1) + return 0.5 * sum_ / self.scale + + def maximum_continuous(self): + opt = torch.from_numpy(np.zeros(shape=(1, self.d))) + value = self.eval_noiseless(opt)[0][0] + return value + + def optimize(self, xtest, sigma, restarts=5, n=512): + xtest = torch.zeros(n, self.d, dtype=torch.float64) + xtest[:, 0] = torch.linspace(-0.5, 0.5, n, dtype=torch.float64) + ytest = self.eval(xtest, sigma=sigma) + kernel = stpy.kernels.KernelFunction( + kernel_name="ard", + gamma=torch.ones(self.d, dtype=torch.float64) * 0.1, + groups=self.groups, + ) + GP = stpy.continuous_processes.gauss_procc.GaussianProcess( + kernel_custom=kernel, s=sigma, d=self.d + ) + GP.fit(xtest, ytest) + GP.optimize_params(type="bandwidth", restarts=restarts) + print("Optimized") + # self.gamma = torch.min(kernel.gamma) + # self.gamma = torch.zeros(1,1,dtype = torch.DoubleTensor) + # self.gamma[0,0] =0.35 + self.gamma = torch.Tensor([0.35]).double() + return self.gamma class SwissFEL(BenchmarkFunction): - def __init__(self, **kwargs): - super().__init__(**kwargs) - self.d = kwargs['d'] - name = kwargs['dts'] - self.Simulator = FelSimulator(self.d, 0.0, "quadrupoles_2d") - self.Simulator.load_fresh(name, dts='0') - #self.groups = stpy.helpers.helper.full_group(self.d) - GP = GaussianProcess(kernel_name="ard", d = self.d) - self.Simulator.fit_simulator(GP, optimize="bandwidth", restarts=2) - self.type = "continuous" - self.s = self.Simulator.s - - def eval_noiseless(self, X): - super().eval_noiseless(X) - y = self.Simulator.eval(X, sigma=0) - return y - - def maximum(self, xtest=None): - return torch.max(self.Simulator.eval(xtest,sigma = 0)) + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.d = kwargs["d"] + name = kwargs["dts"] + self.Simulator = FelSimulator(self.d, 0.0, "quadrupoles_2d") + self.Simulator.load_fresh(name, dts="0") + # self.groups = stpy.helpers.helper.full_group(self.d) + GP = GaussianProcess(kernel_name="ard", d=self.d) + self.Simulator.fit_simulator(GP, optimize="bandwidth", restarts=2) + self.type = "continuous" + self.s = self.Simulator.s + + def eval_noiseless(self, X): + super().eval_noiseless(X) + y = self.Simulator.eval(X, sigma=0) + return y + + def maximum(self, xtest=None): + return torch.max(self.Simulator.eval(xtest, sigma=0)) class CustomBenchmark(BenchmarkFunction): - def __init__(self, **kwargs): - super().__init__(**kwargs) - if 'func' in kwargs: - self.eval_f = kwargs['func'] - else: - self.eval_f = lambda x: x[:, 0].view(-1, 1) * 0 - if 'likelihood' in kwargs: - self.likelihood = kwargs['likelihood'] - else: - self.likelihood = None - - def set_eval(self, f, scale=1.): - self.eval_f = f - self.scale = scale - - def eval_noiseless(self, X): - #super().eval_noiseless(X) - y = self.eval_f(X) - return y / self.scale - - def eval(self, X): - if self.likelihood is not None: - return self.eval_noiseless(X)+self.likelihood.sample_noise(X) - else: - return self.eval_noiseless(X) + def __init__(self, **kwargs): + super().__init__(**kwargs) + if "func" in kwargs: + self.eval_f = kwargs["func"] + else: + self.eval_f = lambda x: x[:, 0].view(-1, 1) * 0 + if "likelihood" in kwargs: + self.likelihood = kwargs["likelihood"] + else: + self.likelihood = None + + def set_eval(self, f, scale=1.0): + self.eval_f = f + self.scale = scale + + def eval_noiseless(self, X): + # super().eval_noiseless(X) + y = self.eval_f(X) + return y / self.scale + + def eval(self, X): + if self.likelihood is not None: + return self.eval_noiseless(X) + self.likelihood.sample_noise(X) + else: + return self.eval_noiseless(X) + class GaussianProcessSample(BenchmarkFunction): - def __init__(self, **kwargs): - super().__init__() - self.d = kwargs['d'] - self.kernel_name = kwargs['name'] - self.gamma = kwargs['gamma'] - self.sigma = kwargs['sigma'] - self.n = kwargs['n'] - self.sample(self.n) - - def sample(self, n): - self.xtest = self.interval(n) - GP = stpy.continuous_processes.gauss_procc.GaussianProcess(s=self.sigma, gamma=self.gamma, - kernel=self.kernel_name) - self.sample = GP.sample(self.xtest).numpy() - - def isin(self, element, test_elements, assume_unique=False): - (n, d) = element.shape - (m, d) = test_elements.shape - maskFull = np.full((n), False, dtype=bool) - for j in range(m): - mask = np.full((n), True, dtype=bool) - for i in range(d): - mask = np.logical_and(mask, np.in1d(element[:, i], test_elements[j, i], assume_unique=assume_unique)) - # mask = np.logical_and(mask, np.isclose(element[:, i], test_elements[j, i], atol=1e-02)) - # print (j, i, mask) - maskFull = np.logical_or(mask, maskFull) - # print (maskFull) - return maskFull - - def eval_noiseless(self, X): - super().eval_noiseless(X) - mask = self.isin(self.xtest.numpy(), X.numpy()) - y = torch.from_numpy(self.sample[mask, :]).view(-1, 1) - return y / self.scale - - def initial_guess(self, N, adv_inv=False): - x = self.xtest[np.random.permutation(np.arange(0, self.xtest.size()[0], 1))[0:N], :] - x = torch.sort(x, dim=0)[0] - return x - - def scale_max(self, xtest=None): - pass - - def optimize(self, xtest, sigma, restarts=5): - pass + def __init__(self, **kwargs): + super().__init__() + self.d = kwargs["d"] + self.kernel_name = kwargs["name"] + self.gamma = kwargs["gamma"] + self.sigma = kwargs["sigma"] + self.n = kwargs["n"] + self.sample(self.n) + + def sample(self, n): + self.xtest = self.interval(n) + GP = stpy.continuous_processes.gauss_procc.GaussianProcess( + s=self.sigma, gamma=self.gamma, kernel=self.kernel_name + ) + self.sample = GP.sample(self.xtest).numpy() + + def isin(self, element, test_elements, assume_unique=False): + (n, d) = element.shape + (m, d) = test_elements.shape + maskFull = np.full((n), False, dtype=bool) + for j in range(m): + mask = np.full((n), True, dtype=bool) + for i in range(d): + mask = np.logical_and( + mask, + np.in1d( + element[:, i], test_elements[j, i], assume_unique=assume_unique + ), + ) + # mask = np.logical_and(mask, np.isclose(element[:, i], test_elements[j, i], atol=1e-02)) + # print (j, i, mask) + maskFull = np.logical_or(mask, maskFull) + # print (maskFull) + return maskFull + + def eval_noiseless(self, X): + super().eval_noiseless(X) + mask = self.isin(self.xtest.numpy(), X.numpy()) + y = torch.from_numpy(self.sample[mask, :]).view(-1, 1) + return y / self.scale + + def initial_guess(self, N, adv_inv=False): + x = self.xtest[ + np.random.permutation(np.arange(0, self.xtest.size()[0], 1))[0:N], : + ] + x = torch.sort(x, dim=0)[0] + return x + + def scale_max(self, xtest=None): + pass + + def optimize(self, xtest, sigma, restarts=5): + pass class KernelizedSample(BenchmarkFunction): - def __init__(self, **kwargs): - super().__init__() - self.d = kwargs['d'] - # self.kernel_name = kwargs['name'] - # self.gamma = kwargs['gamma'] - self.sigma = kwargs['sigma'] - # self.n = kwargs['n'] - self.embed = kwargs['embed'] - self.m = kwargs['m'] - self.sample() + def __init__(self, **kwargs): + super().__init__() + self.d = kwargs["d"] + # self.kernel_name = kwargs['name'] + # self.gamma = kwargs['gamma'] + self.sigma = kwargs["sigma"] + # self.n = kwargs['n'] + self.embed = kwargs["embed"] + self.m = kwargs["m"] + self.sample() - def set_theta(self, theta): - self.theta = theta + def set_theta(self, theta): + self.theta = theta - def set_cutoff(self, cutoff): - self.theta[cutoff:, 0] = 0 + def set_cutoff(self, cutoff): + self.theta[cutoff:, 0] = 0 - def sample(self): - print("basis size:", self.m) - GP = stpy.continuous_processes.kernelized_features.KernelizedFeatures(d=self.d, m=self.m, embeding=self.embed) - self.theta = GP.sample_theta(size=1) - print(self.theta) + def sample(self): + print("basis size:", self.m) + GP = stpy.continuous_processes.kernelized_features.KernelizedFeatures( + d=self.d, m=self.m, embeding=self.embed + ) + self.theta = GP.sample_theta(size=1) + print(self.theta) - def eval_noiseless(self, X): - super().eval_noiseless(X) - y = torch.mm(self.embed(X), self.theta) - return y / self.scale + def eval_noiseless(self, X): + super().eval_noiseless(X) + y = torch.mm(self.embed(X), self.theta) + return y / self.scale - def scale_max(self, xtest=None): - pass + def scale_max(self, xtest=None): + pass - def optimize(self, xtest, sigma, restarts=5): - pass + def optimize(self, xtest, sigma, restarts=5): + pass class Simple1DFunction(BenchmarkFunction): - def __init__(self, **kwargs): - super().__init__() - self.d = kwargs['d'] + def __init__(self, **kwargs): + super().__init__() + self.d = kwargs["d"] + + def eval_noiseless(self, X): + super().eval_noiseless(X) + z = (X + 0.5) * 1.2 + y = -(1.4 - 3 * z) * torch.sin(18 * z) + return y - def eval_noiseless(self, X): - super().eval_noiseless(X) - z = (X+0.5)*1.2 - y = -(1.4-3*z)*torch.sin(18*z) - return y + def maximum(self, xtest): + return torch.max(torch.abs(self.eval_noiseless(xtest))) - def maximum(self, xtest): - return torch.max(torch.abs(self.eval_noiseless(xtest))) class MultiRKHS(BenchmarkFunction): - def __init__(self, **kwargs): - super().__init__() - self.d = 1 + def __init__(self, **kwargs): + super().__init__() + self.d = 1 - def eval_noiseless(self, X): - y = 10 * X ** 2 # + 0.1*torch.sin(10*X) #+ torch.sum(torch.exp(-(X-Xi)**2)*Wi) - return y + def eval_noiseless(self, X): + y = 10 * X**2 # + 0.1*torch.sin(10*X) #+ torch.sum(torch.exp(-(X-Xi)**2)*Wi) + return y - def maximum(self, xtest=None): - pass + def maximum(self, xtest=None): + pass class LinearBenchmark(BenchmarkFunction): - def __init__(self, d, s): - self.d = d - self.s = s - # sample a plane - self.theta = torch.randn(d, 1, dtype=torch.float64) - - def eval_noiseless(self, X): - y = torch.mm(X, self.theta) - return y - - def eval(self, X, sigma=None): - if sigma is None: - sigma = self.s - z = self.eval_noiseless(X) - y = z + sigma * torch.randn(X.size()[0], 1, dtype=torch.float64) - return y + def __init__(self, d, s): + self.d = d + self.s = s + # sample a plane + self.theta = torch.randn(d, 1, dtype=torch.float64) + + def eval_noiseless(self, X): + y = torch.mm(X, self.theta) + return y + + def eval(self, X, sigma=None): + if sigma is None: + sigma = self.s + z = self.eval_noiseless(X) + y = z + sigma * torch.randn(X.size()[0], 1, dtype=torch.float64) + return y diff --git a/stpy/test_functions/neural_net.py b/stpy/test_functions/neural_net.py index 82807cd..82d770d 100755 --- a/stpy/test_functions/neural_net.py +++ b/stpy/test_functions/neural_net.py @@ -4,206 +4,232 @@ def matlab_style_gauss2D(shape=(3, 3), sigma=0.5): - m, n = [(ss - 1.) / 2. for ss in shape] - y, x = np.ogrid[-m:m + 1, -n:n + 1] - h = np.exp(-(x * x + y * y) / (2. * sigma * sigma)) - h[h < np.finfo(h.dtype).eps * h.max()] = 0 - sumh = h.sum() - if sumh != 0: - h /= sumh - return h + m, n = [(ss - 1.0) / 2.0 for ss in shape] + y, x = np.ogrid[-m : m + 1, -n : n + 1] + h = np.exp(-(x * x + y * y) / (2.0 * sigma * sigma)) + h[h < np.finfo(h.dtype).eps * h.max()] = 0 + sumh = h.sum() + if sumh != 0: + h /= sumh + return h def gaussian_filters(shape, sigmas): - (height, width, enter, leave) = shape - G = np.zeros(shape=shape) - for q in range(enter): - for index, val in enumerate(sigmas): - G[:, :, q, index] = matlab_style_gauss2D(shape=(height, width), sigma=val) - return G + (height, width, enter, leave) = shape + G = np.zeros(shape=shape) + for q in range(enter): + for index, val in enumerate(sigmas): + G[:, :, q, index] = matlab_style_gauss2D(shape=(height, width), sigma=val) + return G def gaussian_filters_tf(shape, sigmas): - G = gaussian_filters(shape, sigmas) - return tf.cast(tf.Variable(G), tf.float32) + G = gaussian_filters(shape, sigmas) + return tf.cast(tf.Variable(G), tf.float32) def deepnn(x, initialization_params, no_filters_1=32, no_filters_2=64): - """deepnn builds the graph for a deep net for classifying digits. - Args: - x: an input tensor with the dimensions (N_examples, 784), where 784 is the - number of pixels in a standard MNIST image. - Returns: - A tuple (y, keep_prob). y is a tensor of shape (N_examples, 10), with values - equal to the logits of classifying the digit into one of 10 classes (the - digits 0-9). keep_prob is a scalar placeholder for the probability of - dropout. - """ - - # Reshape to use within a convolutional neural net. - # Last dimension is for "features" - there is only one here, since images are - # grayscale -- it would be 3 for an RGB image, 4 for RGBA, etc. - with tf.name_scope('reshape'): - x_image = tf.reshape(x, [-1, 28, 28, 1]) - - # First convolutional layer - maps one grayscale image to 32 feature maps. - with tf.name_scope('conv1'): - # W_conv1 = weight_variable([5, 5, 1, no_filters_1]) - W_conv1 = gaussian_filters_tf([5, 5, 1, no_filters_1], initialization_params[0:no_filters_1]) - b_conv1 = bias_variable([no_filters_1]) - h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1) - - # Pooling layer - downsamples by 2X. - with tf.name_scope('pool1'): - h_pool1 = max_pool_2x2(h_conv1) - - # Second convolutional layer -- maps 32 feature maps to 64. - with tf.name_scope('conv2'): - # W_conv2 = weight_variable([5, 5, no_filters_1, no_filters_2]) - W_conv2 = gaussian_filters_tf([5, 5, no_filters_1, no_filters_2], initialization_params[no_filters_1:]) - b_conv2 = bias_variable([no_filters_2]) - h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) - - # Second pooling layer. - with tf.name_scope('pool2'): - h_pool2 = max_pool_2x2(h_conv2) - - # Fully connected layer 1 -- after 2 round of downsampling, our 28x28 image - # is down to 7x7x64 feature maps -- maps this to 1024 features. - with tf.name_scope('fc1'): - W_fc1 = weight_variable([7 * 7 * no_filters_2, 1024]) - b_fc1 = bias_variable([1024]) - - h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * no_filters_2]) - h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1) - - # Dropout - controls the complexity of the model, prevents co-adaptation of - # features. - with tf.name_scope('dropout'): - keep_prob = tf.placeholder(tf.float32) - h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob) - - # Map the 1024 features to 10 classes, one for each digit - with tf.name_scope('fc2'): - W_fc2 = weight_variable([1024, 10]) - b_fc2 = bias_variable([10]) - - y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2 - return y_conv, keep_prob + """deepnn builds the graph for a deep net for classifying digits. + Args: + x: an input tensor with the dimensions (N_examples, 784), where 784 is the + number of pixels in a standard MNIST image. + Returns: + A tuple (y, keep_prob). y is a tensor of shape (N_examples, 10), with values + equal to the logits of classifying the digit into one of 10 classes (the + digits 0-9). keep_prob is a scalar placeholder for the probability of + dropout. + """ + + # Reshape to use within a convolutional neural net. + # Last dimension is for "features" - there is only one here, since images are + # grayscale -- it would be 3 for an RGB image, 4 for RGBA, etc. + with tf.name_scope("reshape"): + x_image = tf.reshape(x, [-1, 28, 28, 1]) + + # First convolutional layer - maps one grayscale image to 32 feature maps. + with tf.name_scope("conv1"): + # W_conv1 = weight_variable([5, 5, 1, no_filters_1]) + W_conv1 = gaussian_filters_tf( + [5, 5, 1, no_filters_1], initialization_params[0:no_filters_1] + ) + b_conv1 = bias_variable([no_filters_1]) + h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1) + + # Pooling layer - downsamples by 2X. + with tf.name_scope("pool1"): + h_pool1 = max_pool_2x2(h_conv1) + + # Second convolutional layer -- maps 32 feature maps to 64. + with tf.name_scope("conv2"): + # W_conv2 = weight_variable([5, 5, no_filters_1, no_filters_2]) + W_conv2 = gaussian_filters_tf( + [5, 5, no_filters_1, no_filters_2], initialization_params[no_filters_1:] + ) + b_conv2 = bias_variable([no_filters_2]) + h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) + + # Second pooling layer. + with tf.name_scope("pool2"): + h_pool2 = max_pool_2x2(h_conv2) + + # Fully connected layer 1 -- after 2 round of downsampling, our 28x28 image + # is down to 7x7x64 feature maps -- maps this to 1024 features. + with tf.name_scope("fc1"): + W_fc1 = weight_variable([7 * 7 * no_filters_2, 1024]) + b_fc1 = bias_variable([1024]) + + h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * no_filters_2]) + h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1) + + # Dropout - controls the complexity of the model, prevents co-adaptation of + # features. + with tf.name_scope("dropout"): + keep_prob = tf.placeholder(tf.float32) + h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob) + + # Map the 1024 features to 10 classes, one for each digit + with tf.name_scope("fc2"): + W_fc2 = weight_variable([1024, 10]) + b_fc2 = bias_variable([10]) + + y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2 + return y_conv, keep_prob def conv2d(x, W): - """conv2d returns a 2d convolution layer with full stride.""" - return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME') + """conv2d returns a 2d convolution layer with full stride.""" + return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding="SAME") def max_pool_2x2(x): - """max_pool_2x2 downsamples a feature map by 2X.""" - return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], - strides=[1, 2, 2, 1], padding='SAME') + """max_pool_2x2 downsamples a feature map by 2X.""" + return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME") def weight_variable(shape): - """weight_variable generates a weight variable of a given shape.""" - initial = tf.truncated_normal(shape, stddev=0.1) - return tf.Variable(initial) + """weight_variable generates a weight variable of a given shape.""" + initial = tf.truncated_normal(shape, stddev=0.1) + return tf.Variable(initial) def bias_variable(shape): - """bias_variable generates a bias variable of a given shape.""" - initial = tf.constant(0.1, shape=shape) - return tf.Variable(initial) - - -def train_network(mnist, verbose=True, initialization_params=None, min_steps_val=10, - val_size=3000, dropout=0.5, learning_rate=10e-4, maxiter=500, val_count=1, batch_size=80, **kwargs): - # Import data - # Create the model - x = tf.placeholder(tf.float32, [None, 784]) - - # Define loss and optimizer - y_ = tf.placeholder(tf.float32, [None, 10]) - - # Build the graph for the deep net - y_conv, keep_prob = deepnn(x, initialization_params) - - with tf.name_scope('loss'): - cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv) - - cross_entropy = tf.reduce_mean(cross_entropy) - - with tf.name_scope('adam_optimizer'): - train_step = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy) - - with tf.name_scope('accuracy'): - correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1)) - correct_prediction = tf.cast(correct_prediction, tf.float32) - accuracy = tf.reduce_mean(correct_prediction) - - # graph_location = tempfile.mkdtemp() - # print('Saving graph to: %s' % graph_location) - # train_writer = tf.summary.FileWriter(graph_location) - # train_writer.add_graph(tf.get_default_graph()) - - init = tf.initialize_all_variables() - - with tf.Session() as sess: - sess.run(init) - # sess.run(tf.global_variables_initializer()) - - oldval_scores = np.zeros((min_steps_val)) - j = 0 - for i in range(maxiter): - batch = mnist.train.next_batch(batch_size) - train_accuracy = accuracy.eval(feed_dict={ - x: batch[0], y_: batch[1], keep_prob: 1.0}) - if i % val_count == 0: - - val_accuracy = accuracy.eval(feed_dict={ - x: mnist.validation.images[0:val_size], - y_: mnist.validation.labels[0:val_size], keep_prob: 1.0}) - oldval_scores[j % min_steps_val] = val_accuracy - j = j + 1 - if verbose == True: - print('step %d, training accuracy: %f, validation accuracy: %f' % (i, train_accuracy, val_accuracy)) - ## validation stopping - if i > min_steps_val: - if np.mean(oldval_scores) > val_accuracy: - if verbose == True: - print("Validation stopping") - break - train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: dropout}) - - test_accuracy = accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}) - if verbose == True: - print('test accuracy %g' % test_accuracy) - - return (i, test_accuracy) + """bias_variable generates a bias variable of a given shape.""" + initial = tf.constant(0.1, shape=shape) + return tf.Variable(initial) + + +def train_network( + mnist, + verbose=True, + initialization_params=None, + min_steps_val=10, + val_size=3000, + dropout=0.5, + learning_rate=10e-4, + maxiter=500, + val_count=1, + batch_size=80, + **kwargs +): + # Import data + # Create the model + x = tf.placeholder(tf.float32, [None, 784]) + + # Define loss and optimizer + y_ = tf.placeholder(tf.float32, [None, 10]) + + # Build the graph for the deep net + y_conv, keep_prob = deepnn(x, initialization_params) + + with tf.name_scope("loss"): + cross_entropy = tf.nn.softmax_cross_entropy_with_logits( + labels=y_, logits=y_conv + ) + + cross_entropy = tf.reduce_mean(cross_entropy) + + with tf.name_scope("adam_optimizer"): + train_step = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy) + + with tf.name_scope("accuracy"): + correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1)) + correct_prediction = tf.cast(correct_prediction, tf.float32) + accuracy = tf.reduce_mean(correct_prediction) + + # graph_location = tempfile.mkdtemp() + # print('Saving graph to: %s' % graph_location) + # train_writer = tf.summary.FileWriter(graph_location) + # train_writer.add_graph(tf.get_default_graph()) + + init = tf.initialize_all_variables() + + with tf.Session() as sess: + sess.run(init) + # sess.run(tf.global_variables_initializer()) + + oldval_scores = np.zeros((min_steps_val)) + j = 0 + for i in range(maxiter): + batch = mnist.train.next_batch(batch_size) + train_accuracy = accuracy.eval( + feed_dict={x: batch[0], y_: batch[1], keep_prob: 1.0} + ) + if i % val_count == 0: + + val_accuracy = accuracy.eval( + feed_dict={ + x: mnist.validation.images[0:val_size], + y_: mnist.validation.labels[0:val_size], + keep_prob: 1.0, + } + ) + oldval_scores[j % min_steps_val] = val_accuracy + j = j + 1 + if verbose == True: + print( + "step %d, training accuracy: %f, validation accuracy: %f" + % (i, train_accuracy, val_accuracy) + ) + ## validation stopping + if i > min_steps_val: + if np.mean(oldval_scores) > val_accuracy: + if verbose == True: + print("Validation stopping") + break + train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: dropout}) + + test_accuracy = accuracy.eval( + feed_dict={x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0} + ) + if verbose == True: + print("test accuracy %g" % test_accuracy) + + return (i, test_accuracy) if __name__ == "__main__": - N = 16 - N2 = 32 - sigmas = np.random.randn(N + N2) - sigmas = sigmas ** 2 - # print (train_network("/tmp/tensorflow", dropout = 0.7, verbose = False, val_size = 1, initialization_params = sigmas, no_filters_1=N, no_filters_2=N2)) - - ############################################## - ######## Visualization of Filters ########### - ############################################## - import matplotlib as mpl - - V = gaussian_filters((5, 5, 1, N), sigmas[0:N]) - fig, axes = plt.subplots(nrows=4, ncols=int(N / 4)) - for index, ax in enumerate(axes.flat): - im = ax.imshow(V[:, :, 0, index], interpolation='nearest', vmin=0, vmax=1) - cax, kw = mpl.colorbar.make_axes([ax for ax in axes.flat]) - plt.colorbar(im, cax=cax, **kw) - - V2 = gaussian_filters((5, 5, N, N2), sigmas[N:]) - fig, axes = plt.subplots(nrows=8, ncols=int(N2 / 8)) - for index, ax in enumerate(axes.flat): - im = ax.imshow(V2[:, :, 0, index], interpolation='nearest', vmin=0, vmax=1) - cax, kw = mpl.colorbar.make_axes([ax for ax in axes.flat]) - plt.colorbar(im, cax=cax, **kw) - plt.show() + N = 16 + N2 = 32 + sigmas = np.random.randn(N + N2) + sigmas = sigmas**2 + # print (train_network("/tmp/tensorflow", dropout = 0.7, verbose = False, val_size = 1, initialization_params = sigmas, no_filters_1=N, no_filters_2=N2)) + + ############################################## + ######## Visualization of Filters ########### + ############################################## + import matplotlib as mpl + + V = gaussian_filters((5, 5, 1, N), sigmas[0:N]) + fig, axes = plt.subplots(nrows=4, ncols=int(N / 4)) + for index, ax in enumerate(axes.flat): + im = ax.imshow(V[:, :, 0, index], interpolation="nearest", vmin=0, vmax=1) + cax, kw = mpl.colorbar.make_axes([ax for ax in axes.flat]) + plt.colorbar(im, cax=cax, **kw) + + V2 = gaussian_filters((5, 5, N, N2), sigmas[N:]) + fig, axes = plt.subplots(nrows=8, ncols=int(N2 / 8)) + for index, ax in enumerate(axes.flat): + im = ax.imshow(V2[:, :, 0, index], interpolation="nearest", vmin=0, vmax=1) + cax, kw = mpl.colorbar.make_axes([ax for ax in axes.flat]) + plt.colorbar(im, cax=cax, **kw) + plt.show() diff --git a/stpy/test_functions/parallel_coordinates_plot.py b/stpy/test_functions/parallel_coordinates_plot.py index e9d9a15..836ed92 100755 --- a/stpy/test_functions/parallel_coordinates_plot.py +++ b/stpy/test_functions/parallel_coordinates_plot.py @@ -8,63 +8,65 @@ def parallel_coordinates_bo(X, Y, names=None, scaling=None, fig_size=(20, 10)): - """ - Parallel plot graph + """ + Parallel plot graph - X : 2D numpy array of parameters [points,parameters] - Y : 1D numpy array of values - names: list of names size of (parameters) - scaling: - "stat": statistical scaling - None : no scaling - (low,hig): tuple, scales to [-1,1] - fig_size: fig size in inches - """ + X : 2D numpy array of parameters [points,parameters] + Y : 1D numpy array of values + names: list of names size of (parameters) + scaling: + "stat": statistical scaling + None : no scaling + (low,hig): tuple, scales to [-1,1] + fig_size: fig size in inches + """ - if scaling == "stat": - scaler = StandardScaler(copy=True, with_mean=True, with_std=True) - scaler.fit(X) - Z = scaler.transform(X) - elif scaling is None: - Z = X - else: - try: - Z = X - up, low = scaling - d = X.shape[1] - for i in range(d): - Z[:, i] = (2 * X[:, i]) / (up[i] - low[i]) + (1.0 - 2 * up[i] / (up[i] - low[i])) - except: - pass + if scaling == "stat": + scaler = StandardScaler(copy=True, with_mean=True, with_std=True) + scaler.fit(X) + Z = scaler.transform(X) + elif scaling is None: + Z = X + else: + try: + Z = X + up, low = scaling + d = X.shape[1] + for i in range(d): + Z[:, i] = (2 * X[:, i]) / (up[i] - low[i]) + ( + 1.0 - 2 * up[i] / (up[i] - low[i]) + ) + except: + pass - D = np.append(Z, Y, axis=1) - data = pd.DataFrame(D) - data = data.sort_values(by=Z.shape[1]) - names = copy.copy(names) - names.append(Z.shape[1]) - if names is not None: - data.columns = names - plt.figure(figsize=(fig_size)) - plt.xticks(rotation=45) - ax = parallel_coordinates(data, Z.shape[1], colormap="summer") - ax.get_legend().remove() - plt.show() + D = np.append(Z, Y, axis=1) + data = pd.DataFrame(D) + data = data.sort_values(by=Z.shape[1]) + names = copy.copy(names) + names.append(Z.shape[1]) + if names is not None: + data.columns = names + plt.figure(figsize=(fig_size)) + plt.xticks(rotation=45) + ax = parallel_coordinates(data, Z.shape[1], colormap="summer") + ax.get_legend().remove() + plt.show() if __name__ == "__main__": - from stpy.test_functions.protein_benchmark import ProteinBenchmark + from stpy.test_functions.protein_benchmark import ProteinBenchmark - Benchmark = ProteinBenchmark("protein_data_gb1.h5", dim=3, ref=['A', 'B', 'C', 'D']) - names = Benchmark.data['P1'].values - Benchmark.self_translate() - vals = Benchmark.data['P1'].values + Benchmark = ProteinBenchmark("protein_data_gb1.h5", dim=3, ref=["A", "B", "C", "D"]) + names = Benchmark.data["P1"].values + Benchmark.self_translate() + vals = Benchmark.data["P1"].values - print(Benchmark.data) - X = Benchmark.data.values[0:8000, 0:3] - Y = Benchmark.data.values[0:8000, 5].reshape(-1, 1) - print(X.shape, Y.shape) - names = ["P1", "P2", "P3"] - # plt.yticks(vals, names) - parallel_coordinates_bo(X, Y, names=names) + print(Benchmark.data) + X = Benchmark.data.values[0:8000, 0:3] + Y = Benchmark.data.values[0:8000, 5].reshape(-1, 1) + print(X.shape, Y.shape) + names = ["P1", "P2", "P3"] + # plt.yticks(vals, names) + parallel_coordinates_bo(X, Y, names=names) - plt.show() + plt.show() diff --git a/stpy/test_functions/protein_benchmark.py b/stpy/test_functions/protein_benchmark.py index 1dc4189..008bfe4 100755 --- a/stpy/test_functions/protein_benchmark.py +++ b/stpy/test_functions/protein_benchmark.py @@ -3,407 +3,489 @@ import pandas as pd import torch -#import stpy.helpers.helper as helper -#from stpy.test_functions.benchmarks import BenchmarkFunction - - -class ProteinOperator(): - - def __init__(self): - - self.real_names = {'A': 'Ala', 'R': 'Arg', 'N': 'Asn', 'D': 'Asp', 'C': 'Cys', 'Q': 'Gln', 'E': 'Glu', - 'G': 'Gly', - 'H': 'His', 'I': 'Iso', 'L': 'Leu', 'K': 'Lys', 'M': 'Met', 'F': 'Phe', - 'P': 'Pro', 'S': 'Ser', 'T': 'Thr', 'W': 'Trp', 'Y': 'Tyr', 'V': 'Val', 'B': 'Asx'} - - self.dictionary = {'A': 0, 'R': 1, 'N': 2, 'D': 3, 'C': 4, 'Q': 5, 'E': 6, 'G': 7, - 'H': 8, 'I': 9, 'L': 10, 'K': 11, 'M': 12, 'F': 13, - 'P': 14, 'S': 15, 'T': 16, 'W': 17, 'Y': 18, 'V': 19, 'B': 3} - - self.inv_dictionary = {v: k for k, v in self.dictionary.items()} - - self.inv_real_names = {v: k for k, v in self.real_names.items()} - - self.Negative = ['D', 'E'] - self.Positive = ['R', 'K', 'H'] - self.Aromatic = ['F', 'W', 'Y', 'H'] - self.Polar = ['N', 'Q', 'S', 'T', 'Y'] - self.Aliphatic = ['A', 'G', 'I', 'L', 'V'] - self.Amide = ['N', 'Q'] - self.Sulfur = ['C', 'M'] - self.Hydroxil = ['S', 'T'] - self.Small = ['A', 'S', 'T', 'P', 'G', 'V'] - self.Medium = ['M', 'L', 'I', 'C', 'N', 'Q', 'K', 'D', 'E'] - self.Large = ['R', 'H', 'W', 'F', 'Y'] - self.Hydro = ['M', 'L', 'I', 'V', 'A'] - self.Cyclic = ['P'] - self.Random = ['F', 'W', 'L', 'S', 'D'] - - def translate(self, X): - f = lambda x: self.dictionary[x] - Y = np.zeros(shape=X.shape).astype(int) - for i in range(X.shape[0]): - for j in range(X.shape[1]): - Y[i, j] = f(X[i, j]) - return Y - - def remove_wild_type_mutations(self, mutation): - mutation_split = mutation.split("+") - output = [] - for mut in mutation_split: - if mut[0] != mut[-1]: - output.append(mut) - return "+".join(output) - - def get_variant_code(self, mutation): - mutation_split = mutation.split("+") - return "".join([mut[-1] for mut in mutation_split]) - - def get_substitutes_from_mutation(self, mutation): - mutation_split = mutation.split("+") - original = [] - new = [] - positions = [] - - for mut in mutation_split: - original.append(mut[0]) - new.append(mut[-1]) - positions.append(int(mut[1:-1])) - - return (original, new, positions) - - def mutation(self, original_seq, positions, new_seq): - old_seq = list(original_seq) - new_seq = list(new_seq) - identifier = [] - for old, new, position in zip(old_seq, new_seq, positions): - if old != new: - identifier.append(old + str(position) + new) - return '+'.join(identifier) - - def interval_number(self, dim=None): - if dim is None: - dim = self.dim - arr = self.interval_letters(dim=dim) - out = self.translate(arr) - return out - - def interval_onehot(self, dim=None): - if dim is None: - dim = self.dim - arr = self.interval_letters(dim=dim) - out = self.translate_one_hot(arr) - return out - - def interval_letters(self, dim=None): - if dim is None: - dim = self.dim - - names = list(self.dictionary.keys()) - names.remove('B') - arr = [] - for i in range(dim): - arr.append(names) - out = helper.cartesian(arr) - return out - - def translate_amino_acid(self, letter): - return self.dictionary[letter] - - def translate_mutation_series(self, series): - f = lambda x: np.array(list(map(int, [self.dictionary[a] for a in list(str(x))]))).reshape(-1, 1) - xtest = np.concatenate(series.apply(f).values, axis=1).T - return xtest - - def translate_one_hot(self, X): - try: - Y = self.translate(X) - except: - Y = X - n, d = list(X.shape) - Z = np.zeros(shape=(n, d * self.total)) - for i in range(n): - for j in range(d): - Z[i, Y[i, j] + j * self.total] = 1.0 - - return Z - - def get_real_name(self, name): - out = [] - for i in name: - out.append(self.real_names[i]) - return out - - -class ProteinBenchmark(): - - def __init__(self, fname, dim=1, ref=['D', 'D', 'D', 'D'], avg=False, scale=True): - """ - initialize the protein benchmark - - fname : dataset name - dim : dimension of the dataset - ref : for smaller dimensions what is the reference in the 4 dim space? - avg : average the effect over other combinations in lower dimensions - """ - - """ +# import stpy.helpers.helper as helper +# from stpy.test_functions.benchmarks import BenchmarkFunction + + +class ProteinOperator: + + def __init__(self): + + self.real_names = { + "A": "Ala", + "R": "Arg", + "N": "Asn", + "D": "Asp", + "C": "Cys", + "Q": "Gln", + "E": "Glu", + "G": "Gly", + "H": "His", + "I": "Iso", + "L": "Leu", + "K": "Lys", + "M": "Met", + "F": "Phe", + "P": "Pro", + "S": "Ser", + "T": "Thr", + "W": "Trp", + "Y": "Tyr", + "V": "Val", + "B": "Asx", + } + + self.dictionary = { + "A": 0, + "R": 1, + "N": 2, + "D": 3, + "C": 4, + "Q": 5, + "E": 6, + "G": 7, + "H": 8, + "I": 9, + "L": 10, + "K": 11, + "M": 12, + "F": 13, + "P": 14, + "S": 15, + "T": 16, + "W": 17, + "Y": 18, + "V": 19, + "B": 3, + } + + self.inv_dictionary = {v: k for k, v in self.dictionary.items()} + + self.inv_real_names = {v: k for k, v in self.real_names.items()} + + self.Negative = ["D", "E"] + self.Positive = ["R", "K", "H"] + self.Aromatic = ["F", "W", "Y", "H"] + self.Polar = ["N", "Q", "S", "T", "Y"] + self.Aliphatic = ["A", "G", "I", "L", "V"] + self.Amide = ["N", "Q"] + self.Sulfur = ["C", "M"] + self.Hydroxil = ["S", "T"] + self.Small = ["A", "S", "T", "P", "G", "V"] + self.Medium = ["M", "L", "I", "C", "N", "Q", "K", "D", "E"] + self.Large = ["R", "H", "W", "F", "Y"] + self.Hydro = ["M", "L", "I", "V", "A"] + self.Cyclic = ["P"] + self.Random = ["F", "W", "L", "S", "D"] + + def translate(self, X): + f = lambda x: self.dictionary[x] + Y = np.zeros(shape=X.shape).astype(int) + for i in range(X.shape[0]): + for j in range(X.shape[1]): + Y[i, j] = f(X[i, j]) + return Y + + def remove_wild_type_mutations(self, mutation): + mutation_split = mutation.split("+") + output = [] + for mut in mutation_split: + if mut[0] != mut[-1]: + output.append(mut) + return "+".join(output) + + def get_variant_code(self, mutation): + mutation_split = mutation.split("+") + return "".join([mut[-1] for mut in mutation_split]) + + def get_substitutes_from_mutation(self, mutation): + mutation_split = mutation.split("+") + original = [] + new = [] + positions = [] + + for mut in mutation_split: + original.append(mut[0]) + new.append(mut[-1]) + positions.append(int(mut[1:-1])) + + return (original, new, positions) + + def mutation(self, original_seq, positions, new_seq): + old_seq = list(original_seq) + new_seq = list(new_seq) + identifier = [] + for old, new, position in zip(old_seq, new_seq, positions): + if old != new: + identifier.append(old + str(position) + new) + return "+".join(identifier) + + def interval_number(self, dim=None): + if dim is None: + dim = self.dim + arr = self.interval_letters(dim=dim) + out = self.translate(arr) + return out + + def interval_onehot(self, dim=None): + if dim is None: + dim = self.dim + arr = self.interval_letters(dim=dim) + out = self.translate_one_hot(arr) + return out + + def interval_letters(self, dim=None): + if dim is None: + dim = self.dim + + names = list(self.dictionary.keys()) + names.remove("B") + arr = [] + for i in range(dim): + arr.append(names) + out = helper.cartesian(arr) + return out + + def translate_amino_acid(self, letter): + return self.dictionary[letter] + + def translate_mutation_series(self, series): + f = lambda x: np.array( + list(map(int, [self.dictionary[a] for a in list(str(x))])) + ).reshape(-1, 1) + xtest = np.concatenate(series.apply(f).values, axis=1).T + return xtest + + def translate_one_hot(self, X): + try: + Y = self.translate(X) + except: + Y = X + n, d = list(X.shape) + Z = np.zeros(shape=(n, d * self.total)) + for i in range(n): + for j in range(d): + Z[i, Y[i, j] + j * self.total] = 1.0 + + return Z + + def get_real_name(self, name): + out = [] + for i in name: + out.append(self.real_names[i]) + return out + + +class ProteinBenchmark: + + def __init__(self, fname, dim=1, ref=["D", "D", "D", "D"], avg=False, scale=True): + """ + initialize the protein benchmark + + fname : dataset name + dim : dimension of the dataset + ref : for smaller dimensions what is the reference in the 4 dim space? + avg : average the effect over other combinations in lower dimensions + """ + + """ Convention of the following dictionary is to map B->D as B can stand for N and D. """ - self.dictionary = {'A': 0, 'R': 1, 'N': 2, 'D': 3, 'C': 4, 'Q': 5, 'E': 6, 'G': 7, - 'H': 8, 'I': 9, 'L': 10, 'K': 11, 'M': 12, 'F': 13, - 'P': 14, 'S': 15, 'T': 16, 'W': 17, 'Y': 18, 'V': 19, 'B': 3} - - f = lambda x: self.dictionary[x] - - self.total = 20 - self.dim = dim - self.ref = ref - self.ref_translated = [f(x) for x in self.ref] - - dset = pd.read_hdf(fname) - - # average the effect over others - if avg == False: - mask = np.full(dset.shape[0], True, dtype=bool) - for j in range(4 - dim): - mask = np.logical_and(mask, dset["P" + str(4 - j)] == ref[3 - j]) - self.data = dset[mask] - else: - # avg. not implemented - pass - - if scale == True: - maximum = np.max(self.data[:]['Fitness']) - self.data[:]['Fitness'] = self.data[:]['Fitness'] / maximum - else: - pass - - self.real_names = {'A': 'Ala', 'R': 'Arg', 'N': 'Asn', 'D': 'Asp', 'C': 'Cys', 'Q': 'Gln', 'E': 'Glu', - 'G': 'Gly', - 'H': 'His', 'I': 'Iso', 'L': 'Leu', 'K': 'Lys', 'M': 'Met', 'F': 'Phe', - 'P': 'Pro', 'S': 'Ser', 'T': 'Thr', 'W': 'Trp', 'Y': 'Tyr', 'V': 'Val', 'B': 'Asx'} - - self.inv_real_names = {v: k for k, v in self.real_names.items()} - - self.Negative = ['D', 'E'] - self.Positive = ['R', 'K', 'H'] - self.Aromatic = ['F', 'W', 'Y', 'H'] - self.Polar = ['N', 'Q', 'S', 'T', 'Y'] - self.Aliphatic = ['A', 'G', 'I', 'L', 'V'] - self.Amide = ['N', 'Q'] - self.Sulfur = ['C', 'M'] - self.Hydroxil = ['S', 'T'] - self.Small = ['A', 'S', 'T', 'P', 'G', 'V'] - self.Medium = ['M', 'L', 'I', 'C', 'N', 'Q', 'K', 'D', 'E'] - self.Large = ['R', 'H', 'W', 'F', 'Y'] - self.Hydro = ['M', 'L', 'I', 'V', 'A'] - self.Cyclic = ['P'] - self.Random = ['F', 'W', 'L', 'S', 'D'] - - def get_real_name(self, name): - out = [] - for i in name: - out.append(self.real_names[i]) - return out - - def data_summary(self): - y = self.data['Fitness'].values - maximum = np.max(y) - minimum = np.min(y) - return (maximum, minimum) - - def translate(self, X): - f = lambda x: self.dictionary[x] - Y = np.zeros(shape=X.shape).astype(int) - for i in range(X.shape[0]): - for j in range(X.shape[1]): - Y[i, j] = f(X[i, j]) - return Y - - def translate_one_hot(self, X): - try: - Y = self.translate(X) - except: - Y = X - n, d = list(X.shape) - Z = np.zeros(shape=(n, d * self.total)) - for i in range(n): - for j in range(d): - Z[i, Y[i, j] + j * self.total] = 1.0 - - return Z - - def self_translate(self): - """ - self translate from - :return: - """ - f = lambda x: self.dictionary[x] - for j in range(4): - self.data['P' + str(j + 1)] = self.data['P' + str(j + 1)].apply(f) - - def set_fidelity(self, F): - self.Fidelity = F - - def scale(self): - self.scale = 1 - - def eval_noiseless(self, X): - """ - evaluate depends on the dimension - """ - res = [] - - # append - n = X.shape[0] - C = np.tile(self.ref_translated[self.dim:4], (n, 1)) - X_ = np.concatenate((X, C), axis=1) - for i in range(n): - x = X_[i, :] - mask = np.full(self.data.shape[0], True, dtype=bool) - for j in range(4): - # print (x[j],self.data["P" + str(j + 1)]) - mask = np.logical_and(mask, self.data["P" + str(j + 1)] == x[j]) - res.append(self.data[mask]['Fitness'].values) - return np.array(res).reshape(-1, 1) - - # def actions(self): - # number_of_actions = self.dim*(20**(self.dim-1)) - # - # actions = [] - # - # ## this includes (20,d) actions - # one_dim = self.interval_onehot(dim = 1) - # #print (one_dim) - # #print ("one dim",one_dim.shape) - # if self.dim - 1>0: - # # this includes (20**(d-1), d) actions - # others = self.interval_onehot(dim = self.dim - 1) - # #print ("others:", others.shape) - # for fix_dim in range(self.dim): - # #print (fix_dim) - # action = np.zeros(shape=(20 ** (self.dim - 1), 20 * self.dim)) - # for elem in one_dim: - # #print (fix_dim*20+(fix_dim+1)*20) - # action[:,fix_dim*20:(fix_dim+1)*20]=elem - # action[:,0:fix_dim*20] = others[:,0:fix_dim*20] - # action[:,(fix_dim+1) * 20:] = others[:,fix_dim*20:] - # actions.append(action) - # return actions - # else: - # return one_dim - - def actions(self): - number_of_actions = self.dim * (20 ** (self.dim - 1)) - - actions = [] - - ## this includes (20,d) actions - one_dim = self.interval_onehot(dim=1) - # print (one_dim) - # print ("one dim",one_dim.shape) - if self.dim - 1 > 0: - # this includes (20**(d-1), d) actions - others = self.interval_onehot(dim=self.dim - 1) - # print ("others:", others.shape) - for elem in others: - for fix_dim in range(self.dim): - action = np.zeros(shape=(20, 20 * self.dim)) - action[:, fix_dim * 20:(fix_dim + 1) * 20] = one_dim - j = 0 - for i in range(self.dim): - if i != fix_dim: - action[:, i * 20:(i + 1) * 20] = elem[j * 20:(j + 1) * 20] - j = j + 1 - - actions.append(action) - return actions - else: - return one_dim - - def subsample_dts_indice_only(self, N, split=0.9): - self.self_translate() - xtest = self.interval_onehot() - - indices = np.arange(0, N, 1) - sample = indices - np.random.shuffle(indices) - - train = sample[0:int(np.round(split * N))] - test = sample[int(np.round(split * N)):N] - - return (train, test) - - def subsample_dts(self, N, split=0.90): - self.self_translate() - xtest = self.interval_onehot() - indices = np.arange(0, N, 1) - - indices = np.random.shuffle(indices) - sample = xtest[indices, :] - - y_sample = self.eval_one_hot(sample) - - x_train = sample[0:int(np.round(split * N)), :] - y_train = y_sample[0:int(np.round(split * N)), :] - x_test = sample[int(np.round(split * N)):N, :] - y_test = y_sample[int(np.round(split * N)):N, :] - - return (x_train, y_train, x_test, y_test) - - def eval_fidelity(self, X): - return self.Fidelity(X) - - def eval(self, X): - z = self.eval_noiseless(X) - return z - - def eval_one_hot(self, X): - n, d = list(X.shape) - Z = np.zeros(shape=(n, self.dim)) - for i in range(n): - for j in range(d): - if X[i, j] > 0: - Z[i, j // self.total] = j % self.total - Z = Z.astype(int) - Y = self.eval(Z) - return Y - - def plot_one_site_map(self, kernel, save=None, dim=1): - plt.figure() - names = list(self.dictionary.keys()) - names.remove('B') - real_names = self.get_real_name(names) - real_names = helper.cartesian([real_names for i in range(dim)]) - - xtest = torch.from_numpy(self.interval_onehot(dim=dim)) - real_names = [','.join(list(i)) for i in real_names] - ax = plt.imshow(kernel(xtest, xtest).detach().numpy()) - plt.colorbar() - plt.xticks(range(xtest.shape[0]), real_names, fontsize=10, rotation=60) - plt.yticks(range(xtest.shape[0]), real_names, fontsize=10) - plt.margins(0.2) - if save is not None: - plt.savefig(save) - else: - plt.show() + self.dictionary = { + "A": 0, + "R": 1, + "N": 2, + "D": 3, + "C": 4, + "Q": 5, + "E": 6, + "G": 7, + "H": 8, + "I": 9, + "L": 10, + "K": 11, + "M": 12, + "F": 13, + "P": 14, + "S": 15, + "T": 16, + "W": 17, + "Y": 18, + "V": 19, + "B": 3, + } + + f = lambda x: self.dictionary[x] + + self.total = 20 + self.dim = dim + self.ref = ref + self.ref_translated = [f(x) for x in self.ref] + + dset = pd.read_hdf(fname) + + # average the effect over others + if avg == False: + mask = np.full(dset.shape[0], True, dtype=bool) + for j in range(4 - dim): + mask = np.logical_and(mask, dset["P" + str(4 - j)] == ref[3 - j]) + self.data = dset[mask] + else: + # avg. not implemented + pass + + if scale == True: + maximum = np.max(self.data[:]["Fitness"]) + self.data[:]["Fitness"] = self.data[:]["Fitness"] / maximum + else: + pass + + self.real_names = { + "A": "Ala", + "R": "Arg", + "N": "Asn", + "D": "Asp", + "C": "Cys", + "Q": "Gln", + "E": "Glu", + "G": "Gly", + "H": "His", + "I": "Iso", + "L": "Leu", + "K": "Lys", + "M": "Met", + "F": "Phe", + "P": "Pro", + "S": "Ser", + "T": "Thr", + "W": "Trp", + "Y": "Tyr", + "V": "Val", + "B": "Asx", + } + + self.inv_real_names = {v: k for k, v in self.real_names.items()} + + self.Negative = ["D", "E"] + self.Positive = ["R", "K", "H"] + self.Aromatic = ["F", "W", "Y", "H"] + self.Polar = ["N", "Q", "S", "T", "Y"] + self.Aliphatic = ["A", "G", "I", "L", "V"] + self.Amide = ["N", "Q"] + self.Sulfur = ["C", "M"] + self.Hydroxil = ["S", "T"] + self.Small = ["A", "S", "T", "P", "G", "V"] + self.Medium = ["M", "L", "I", "C", "N", "Q", "K", "D", "E"] + self.Large = ["R", "H", "W", "F", "Y"] + self.Hydro = ["M", "L", "I", "V", "A"] + self.Cyclic = ["P"] + self.Random = ["F", "W", "L", "S", "D"] + + def get_real_name(self, name): + out = [] + for i in name: + out.append(self.real_names[i]) + return out + + def data_summary(self): + y = self.data["Fitness"].values + maximum = np.max(y) + minimum = np.min(y) + return (maximum, minimum) + + def translate(self, X): + f = lambda x: self.dictionary[x] + Y = np.zeros(shape=X.shape).astype(int) + for i in range(X.shape[0]): + for j in range(X.shape[1]): + Y[i, j] = f(X[i, j]) + return Y + + def translate_one_hot(self, X): + try: + Y = self.translate(X) + except: + Y = X + n, d = list(X.shape) + Z = np.zeros(shape=(n, d * self.total)) + for i in range(n): + for j in range(d): + Z[i, Y[i, j] + j * self.total] = 1.0 + + return Z + + def self_translate(self): + """ + self translate from + :return: + """ + f = lambda x: self.dictionary[x] + for j in range(4): + self.data["P" + str(j + 1)] = self.data["P" + str(j + 1)].apply(f) + + def set_fidelity(self, F): + self.Fidelity = F + + def scale(self): + self.scale = 1 + + def eval_noiseless(self, X): + """ + evaluate depends on the dimension + """ + res = [] + + # append + n = X.shape[0] + C = np.tile(self.ref_translated[self.dim : 4], (n, 1)) + X_ = np.concatenate((X, C), axis=1) + for i in range(n): + x = X_[i, :] + mask = np.full(self.data.shape[0], True, dtype=bool) + for j in range(4): + # print (x[j],self.data["P" + str(j + 1)]) + mask = np.logical_and(mask, self.data["P" + str(j + 1)] == x[j]) + res.append(self.data[mask]["Fitness"].values) + return np.array(res).reshape(-1, 1) + + # def actions(self): + # number_of_actions = self.dim*(20**(self.dim-1)) + # + # actions = [] + # + # ## this includes (20,d) actions + # one_dim = self.interval_onehot(dim = 1) + # #print (one_dim) + # #print ("one dim",one_dim.shape) + # if self.dim - 1>0: + # # this includes (20**(d-1), d) actions + # others = self.interval_onehot(dim = self.dim - 1) + # #print ("others:", others.shape) + # for fix_dim in range(self.dim): + # #print (fix_dim) + # action = np.zeros(shape=(20 ** (self.dim - 1), 20 * self.dim)) + # for elem in one_dim: + # #print (fix_dim*20+(fix_dim+1)*20) + # action[:,fix_dim*20:(fix_dim+1)*20]=elem + # action[:,0:fix_dim*20] = others[:,0:fix_dim*20] + # action[:,(fix_dim+1) * 20:] = others[:,fix_dim*20:] + # actions.append(action) + # return actions + # else: + # return one_dim + + def actions(self): + number_of_actions = self.dim * (20 ** (self.dim - 1)) + + actions = [] + + ## this includes (20,d) actions + one_dim = self.interval_onehot(dim=1) + # print (one_dim) + # print ("one dim",one_dim.shape) + if self.dim - 1 > 0: + # this includes (20**(d-1), d) actions + others = self.interval_onehot(dim=self.dim - 1) + # print ("others:", others.shape) + for elem in others: + for fix_dim in range(self.dim): + action = np.zeros(shape=(20, 20 * self.dim)) + action[:, fix_dim * 20 : (fix_dim + 1) * 20] = one_dim + j = 0 + for i in range(self.dim): + if i != fix_dim: + action[:, i * 20 : (i + 1) * 20] = elem[ + j * 20 : (j + 1) * 20 + ] + j = j + 1 + + actions.append(action) + return actions + else: + return one_dim + + def subsample_dts_indice_only(self, N, split=0.9): + self.self_translate() + xtest = self.interval_onehot() + + indices = np.arange(0, N, 1) + sample = indices + np.random.shuffle(indices) + + train = sample[0 : int(np.round(split * N))] + test = sample[int(np.round(split * N)) : N] + + return (train, test) + + def subsample_dts(self, N, split=0.90): + self.self_translate() + xtest = self.interval_onehot() + indices = np.arange(0, N, 1) + + indices = np.random.shuffle(indices) + sample = xtest[indices, :] + + y_sample = self.eval_one_hot(sample) + + x_train = sample[0 : int(np.round(split * N)), :] + y_train = y_sample[0 : int(np.round(split * N)), :] + x_test = sample[int(np.round(split * N)) : N, :] + y_test = y_sample[int(np.round(split * N)) : N, :] + + return (x_train, y_train, x_test, y_test) + + def eval_fidelity(self, X): + return self.Fidelity(X) + + def eval(self, X): + z = self.eval_noiseless(X) + return z + + def eval_one_hot(self, X): + n, d = list(X.shape) + Z = np.zeros(shape=(n, self.dim)) + for i in range(n): + for j in range(d): + if X[i, j] > 0: + Z[i, j // self.total] = j % self.total + Z = Z.astype(int) + Y = self.eval(Z) + return Y + + def plot_one_site_map(self, kernel, save=None, dim=1): + plt.figure() + names = list(self.dictionary.keys()) + names.remove("B") + real_names = self.get_real_name(names) + real_names = helper.cartesian([real_names for i in range(dim)]) + + xtest = torch.from_numpy(self.interval_onehot(dim=dim)) + real_names = [",".join(list(i)) for i in real_names] + ax = plt.imshow(kernel(xtest, xtest).detach().numpy()) + plt.colorbar() + plt.xticks(range(xtest.shape[0]), real_names, fontsize=10, rotation=60) + plt.yticks(range(xtest.shape[0]), real_names, fontsize=10) + plt.margins(0.2) + if save is not None: + plt.savefig(save) + else: + plt.show() if __name__ == "__main__": - Benchmark = ProteinBenchmark("protein_data_gb1.h5", dim=2, ref=['A', 'B', 'C', 'D']) - # print (Benchmark.data) - Benchmark.self_translate() - Benchmark.data.plot.scatter(x='P1', y='P2', c=Benchmark.data['Fitness'], s=200) - # print (Benchmark.data) - X = np.array([['F', 'C'], ['D', 'C']]) - X_ = Benchmark.translate(X) - print(X, X_) - X__ = Benchmark.translate_one_hot(X) + Benchmark = ProteinBenchmark("protein_data_gb1.h5", dim=2, ref=["A", "B", "C", "D"]) + # print (Benchmark.data) + Benchmark.self_translate() + Benchmark.data.plot.scatter(x="P1", y="P2", c=Benchmark.data["Fitness"], s=200) + # print (Benchmark.data) + X = np.array([["F", "C"], ["D", "C"]]) + X_ = Benchmark.translate(X) + print(X, X_) + X__ = Benchmark.translate_one_hot(X) - print(Benchmark.translate_one_hot(X)) + print(Benchmark.translate_one_hot(X)) - print(Benchmark.eval(X_)) + print(Benchmark.eval(X_)) - print(Benchmark.eval_one_hot(X__)) + print(Benchmark.eval_one_hot(X__)) diff --git a/stpy/test_functions/swissfel_simulator.py b/stpy/test_functions/swissfel_simulator.py index f7eaec1..f82c51d 100755 --- a/stpy/test_functions/swissfel_simulator.py +++ b/stpy/test_functions/swissfel_simulator.py @@ -5,116 +5,123 @@ from stpy.helpers.helper import * -class FelSimulator(): - - def __init__(self, d, sigma, name): - self.d = d - self.sigma = sigma - self.exp_name = name - - def help(self, reload=False): - print("Help for the FelSimulator") - - def load_pickle(self, file_name): - - self.GP = pickle.load(open(file_name, "rb")) - self.d = self.GP.d - self.exp_name = self.GP.exp_name - - def save(self, file_name): - self.GP.exp_name = self.exp_name - pickle.dump(self.GP, open(file_name, "wb"), -1) - - def load_fresh(self, file_name, dts='1'): - f = File(file_name, 'r') - dset = f[dts] - print(dset) - n = dset[str("x")].shape[0] - mask = np.full(n, False, dtype=bool) - for j in range(self.d): - maskNew = dset["line_id"] == j - mask = np.logical_or(mask, maskNew) - print("Using ", np.sum(mask), "points to fit the model.") - self.x = dset["x"][mask, 0:self.d].reshape(-1, self.d) - self.y = dset["y"][mask].reshape(-1, 1) - # y response and scale, x scale to [-0.5,0.5] - scale = np.max(np.abs(self.y)) - self.y = self.y / scale - for j in range(self.d): - a = np.min(self.x[:, j]) - b = np.max(self.x[:, j]) - self.x[:, j] = (self.x[:, j] / (b - a)) - 0.5 - a / (b - a) - # noise structure - self.s = np.max(dset["y_std"][mask] / scale) - print("The noise level estimated to be:", self.s) - self.x = torch.from_numpy(self.x) - self.y = torch.from_numpy(self.y) - - f.close() - - def fit_simulator(self, GP, optimize="bandwidth", restarts=10): - self.GP = GP - self.GP.s = self.s - self.GP.fit(self.x, self.y) - print("Model fitted.") - self.GP.optimize_params(type=optimize, restarts=restarts) - self.GP.back_prop = True - - def bounds(self, N, n): - x = torch.from_numpy(np.random.uniform(-0.5, 0.5, size=(N, self.GP.d))) - if n == None: - xtest = None - else: - arrays = [np.linspace(-0.5, 0.5, n).reshape(n, 1) for i in range(self.GP.d)] - xtest = cartesian(arrays) - xtest = torch.from_numpy(xtest) - return (x, xtest, self.GP.d, None) - - def opt_bounds(self): - bounds = tuple([(-0.5, 0.5) for i in range(self.GP.d)]) - return bounds - - def constraint(self, X): - return True - - def eval(self, X, sigma=None): - if sigma is None: - sigma = self.sigma - [mu, _] = self.GP.mean_std(X) - return mu + sigma * torch.randn(X.size()[0], 1, dtype=torch.float64) - - def eval_sample(self, X, sigma=None): - if sigma is None: - sigma = self.sigma - f = self.GP.sample(X) - self.x = torch.cat((self.x, X), dim=0) - self.y = torch.cat((self.y, f), dim=0) - self.GP.fit(self.x, self.y) - return f - - def optimum(self): - ## find optimum using backpropagation optimize eval_sample given X - x = torch.randn(self.d, 1, requires_grad=True) - x0 = x - - from scipy.optimize import minimize - - def fun(x): - x = np.array([x]) - return -self.eval(torch.from_numpy(x)).numpy()[0][0] - - def grad(x): - z = torch.from_numpy(np.array([x])) - z.requires_grad_(True) - y = -self.eval(z) - y.backward() - return z.grad.numpy()[0] - - mybounds = self.opt_bounds() - res = minimize(fun, x0.detach().numpy(), method="L-BFGS-B", jac=grad, tol=0.0001, bounds=mybounds) - solution = res.x - - val = self.eval(torch.from_numpy(solution).unsqueeze(0)) - loc = torch.from_numpy(solution).unsqueeze(0) - - return (val, loc) +class FelSimulator: + + def __init__(self, d, sigma, name): + self.d = d + self.sigma = sigma + self.exp_name = name + + def help(self, reload=False): + print("Help for the FelSimulator") + + def load_pickle(self, file_name): + + self.GP = pickle.load(open(file_name, "rb")) + self.d = self.GP.d + self.exp_name = self.GP.exp_name + + def save(self, file_name): + self.GP.exp_name = self.exp_name + pickle.dump(self.GP, open(file_name, "wb"), -1) + + def load_fresh(self, file_name, dts="1"): + f = File(file_name, "r") + dset = f[dts] + print(dset) + n = dset[str("x")].shape[0] + mask = np.full(n, False, dtype=bool) + for j in range(self.d): + maskNew = dset["line_id"] == j + mask = np.logical_or(mask, maskNew) + print("Using ", np.sum(mask), "points to fit the model.") + self.x = dset["x"][mask, 0 : self.d].reshape(-1, self.d) + self.y = dset["y"][mask].reshape(-1, 1) + # y response and scale, x scale to [-0.5,0.5] + scale = np.max(np.abs(self.y)) + self.y = self.y / scale + for j in range(self.d): + a = np.min(self.x[:, j]) + b = np.max(self.x[:, j]) + self.x[:, j] = (self.x[:, j] / (b - a)) - 0.5 - a / (b - a) + # noise structure + self.s = np.max(dset["y_std"][mask] / scale) + print("The noise level estimated to be:", self.s) + self.x = torch.from_numpy(self.x) + self.y = torch.from_numpy(self.y) + + f.close() + + def fit_simulator(self, GP, optimize="bandwidth", restarts=10): + self.GP = GP + self.GP.s = self.s + self.GP.fit(self.x, self.y) + print("Model fitted.") + self.GP.optimize_params(type=optimize, restarts=restarts) + self.GP.back_prop = True + + def bounds(self, N, n): + x = torch.from_numpy(np.random.uniform(-0.5, 0.5, size=(N, self.GP.d))) + if n == None: + xtest = None + else: + arrays = [np.linspace(-0.5, 0.5, n).reshape(n, 1) for i in range(self.GP.d)] + xtest = cartesian(arrays) + xtest = torch.from_numpy(xtest) + return (x, xtest, self.GP.d, None) + + def opt_bounds(self): + bounds = tuple([(-0.5, 0.5) for i in range(self.GP.d)]) + return bounds + + def constraint(self, X): + return True + + def eval(self, X, sigma=None): + if sigma is None: + sigma = self.sigma + [mu, _] = self.GP.mean_std(X) + return mu + sigma * torch.randn(X.size()[0], 1, dtype=torch.float64) + + def eval_sample(self, X, sigma=None): + if sigma is None: + sigma = self.sigma + f = self.GP.sample(X) + self.x = torch.cat((self.x, X), dim=0) + self.y = torch.cat((self.y, f), dim=0) + self.GP.fit(self.x, self.y) + return f + + def optimum(self): + ## find optimum using backpropagation optimize eval_sample given X + x = torch.randn(self.d, 1, requires_grad=True) + x0 = x + + from scipy.optimize import minimize + + def fun(x): + x = np.array([x]) + return -self.eval(torch.from_numpy(x)).numpy()[0][0] + + def grad(x): + z = torch.from_numpy(np.array([x])) + z.requires_grad_(True) + y = -self.eval(z) + y.backward() + return z.grad.numpy()[0] + + mybounds = self.opt_bounds() + res = minimize( + fun, + x0.detach().numpy(), + method="L-BFGS-B", + jac=grad, + tol=0.0001, + bounds=mybounds, + ) + solution = res.x + + val = self.eval(torch.from_numpy(solution).unsqueeze(0)) + loc = torch.from_numpy(solution).unsqueeze(0) + + return (val, loc) diff --git a/stpy/test_functions/test_functions.py b/stpy/test_functions/test_functions.py index 11e6fed..84121ab 100755 --- a/stpy/test_functions/test_functions.py +++ b/stpy/test_functions/test_functions.py @@ -4,677 +4,792 @@ import stpy import stpy.continuous_processes.gauss_procc + # from tensorflow.examples.tutorials.mnist import input_data from stpy.helpers.helper import * from stpy.test_functions.neural_net import train_network def isin(element, test_elements, assume_unique=False): - (n, d) = element.shape - (m, d) = test_elements.shape - maskFull = np.full((n), False, dtype=bool) - for j in range(m): - mask = np.full((n), True, dtype=bool) - for i in range(d): - # mask = np.logical_and(mask,np.in1d(element[:,i],test_elements[j,i], assume_unique=assume_unique)) - mask = np.logical_and(mask, np.isclose(element[:, i], test_elements[j, i], atol=1e-01)) - # print (j, i, mask) - maskFull = np.logical_or(mask, maskFull) - # print (maskFull) - return maskFull + (n, d) = element.shape + (m, d) = test_elements.shape + maskFull = np.full((n), False, dtype=bool) + for j in range(m): + mask = np.full((n), True, dtype=bool) + for i in range(d): + # mask = np.logical_and(mask,np.in1d(element[:,i],test_elements[j,i], assume_unique=assume_unique)) + mask = np.logical_and( + mask, np.isclose(element[:, i], test_elements[j, i], atol=1e-01) + ) + # print (j, i, mask) + maskFull = np.logical_or(mask, maskFull) + # print (maskFull) + return maskFull class test_function: - def __init__(self): - "nothing" - self.sampled = False - self.init = False - self.scale = 1.0 - - ## General F - def f(self, X, sigma=0.00001, a=0.5): - # in X rows are points, cols are features - X = X * 8 - y = -np.sin(a * np.sum(X ** 2, axis=1)).reshape(X.shape[0], 1) - y = y + sigma * np.random.randn(X.shape[0], 1) - return y - - def f_bounds(self, N, n, d=1, L_infinity_ball=1.): - x = np.random.uniform(-L_infinity_ball, L_infinity_ball, size=(N, d)) - # grid - if n == None: - xtest = None - else: - arrays = [np.linspace(-L_infinity_ball, L_infinity_ball, n).reshape(n, 1) for i in range(d)] - xtest = cartesian(arrays) - return (d, xtest, x, 0.15) - - def f_opt_bounds(self, d=1, L_infinity_ball=1): - b = tuple([(-L_infinity_ball, L_infinity_ball) for i in range(d)]) - return b - - def optimize_f(self, d=1, a=0.5, L_infinity_ball=1): - from scipy.optimize import minimize - - grad = lambda x: np.cos(np.sum(x ** 2) / 2) * x - fun = lambda x: np.sin(np.sum(x ** 2) / 2) + 1 - - bounds = self.f_opt_bounds(d=d, L_infinity_ball=L_infinity_ball) - r = [] - for _ in range(500): - (d, _, x, _) = self.f_bounds(1, None, d=d, L_infinity_ball=L_infinity_ball) - x0 = x[0, :] - res = minimize(fun, x0, method="SLSQP", jac=grad, tol=0.0001, bounds=bounds) - r.append(fun(res.x)) - - print(d, max(r)) - - def sample_ss(self, X, sigma=0.001, gamma=1.0, GP=None): - # in X rows are points, cols are features - if self.sampled == False: - # print ("sampling") - if GP == None: - GP = stpy.continuous_processes.gauss_procc.GaussianProcess(s=sigma, gamma=gamma) - self.sample = GP.sample(torch.from_numpy(self.xtest)).numpy() - mask = isin(self.xtest, X) - self.sampled = True - return self.sample[mask, :].numpy() + np.random.randn(X.shape[0], 1) * sigma - else: - self.sample = GP.sample(torch.from_numpy(self.xtest)).numpy() - mask = isin(self.xtest, X) - self.sampled = True - return self.sample[mask, :] + np.random.randn(X.shape[0], 1) * sigma - else: - mask = isin(self.xtest, X) - return self.sample[mask, :] + np.random.randn(X.shape[0], 1) * sigma - - def sample_ss_bounds(self, N, n, d=1, L_infinity_ball=1., gamma=1.0): - # self.sampled = False - # grid - arrays = [np.linspace(-L_infinity_ball, L_infinity_ball, n).reshape(n, 1) for i in range(d)] - xtest = cartesian(arrays) - self.xtest = xtest - self.n = n - # x = self.xtest[np.random.randint(0,n,size = N),:] - x = self.xtest[np.random.permutation(np.arange(0, self.xtest.shape[0], 1))[0:N], :] - x = np.sort(x, axis=0) - return (d, xtest, x, gamma) - - def sample_ss_reset(self): - self.samples = False - - def optimize(self, xtest, ytest, groups, s): - (n, d) = xtest.size() - kernel = stpy.kernels.KernelFunction(kernel_name="ard", gamma=torch.ones(d, dtype=torch.float64) * 0.1, - groups=groups) - GP = stpy.continuous_processes.gauss_procc.GaussianProcess(kernel_custom=kernel, s=s, d=d) - GP.fit_gp(xtest, ytest) - GP.optimize_params(type="bandwidth") - print("Optimized") - return torch.min(kernel.gamma) - - ## Branin Function - def branin(self, X, sigma=0.1): - if X.shape[1] != 2: - raise AssertionError("Invalid dimension of grid with Branin Function") - else: - xx = X[:, 0] - yy = X[:, 1] - y = ((yy - (5.1 / (4. * np.pi)) * (xx ** 2) + 5. / np.pi - 6.) ** 2 + 10. * ( - 1. - 1. / (8. * np.pi)) * np.cos(xx) + 10.) / 150 - y = -y.reshape(X.shape[0], 1) - return y - - def branin_bounds(self, N, n): - x = np.random.uniform(0, 10, size=(N, 2)) - # grid - if n == None: - xtest = None - else: - arrays = [np.linspace(-5, 10, n).reshape(n, 1), np.linspace(0, 15, n).reshape(n, 1)] - xtest = cartesian(arrays) - return (2, xtest, x, 2.5) - - def branin_opt_bounds(self): - b = tuple([(-5, 10), (0, 15)]) - return b - - ## Camelback Function - def camelback(self, X, sigma=0.1): - if X.shape[1] != 2: - raise AssertionError("Invalid dimension of grid with Branin Function") - else: - xx = X[:, 0] * 4 - yy = X[:, 1] * 2 - y = (4. - 2.1 * xx ** 2 + (xx ** 4) / 3.) * (xx ** 2) + xx * yy + (-4. + 4 * (yy ** 2)) * (yy ** 2) - y = -y.reshape(X.shape[0], 1) - # y = np.tanh(y) - y = y / 5. - return y / self.scale + sigma * np.random.randn(X.shape[0], 1) - - def camelback_bounds(self, N, n, adv_inv=False): - if adv_inv == False: - x = np.random.uniform(-0.5, 0.5, size=(N, 2)) - else: - x = np.random.uniform(-0.5, -0.4, size=(N, 2)) - # grid - if n == None: - xtest = None - else: - arrays = [np.linspace(-0.5, 0.5, n).reshape(n, 1), np.linspace(-0.5, 0.5, n).reshape(n, 1)] - xtest = cartesian(arrays) - return (2, xtest, x, 0.1) - - def camelback_opt_bounds(self): - b = tuple([(-0.5, 0.5), (-0.5, 0.5)]) - return b - - def camelback_scale(self, xtest): - self.scale = np.max((self.camelback(xtest, sigma=0))) - print("Scaling:", self.scale) - - ## Hartmann 6 - def hartmann6(self, X, sigma=0.1): - if X.shape[1] != 6: - raise AssertionError("Invalid dimension of grid with Branin Function") - else: - # opt = np.array([[0.20169, 0.150011, 0.476874, 0.275332, 0.311652, 0.6573]]) - # fopt = np.array([[-3.32237]]) - - alpha = [1.00, 1.20, 3.00, 3.20] - A = np.array([[10.00, 3.00, 17.00, 3.50, 1.70, 8.00], - [0.05, 10.00, 17.00, 0.10, 8.00, 14.00], - [3.00, 3.50, 1.70, 10.00, 17.00, 8.00], - [17.00, 8.00, 0.05, 10.00, 0.10, 14.00]]) - P = 0.0001 * np.array([[1312, 1696, 5569, 124, 8283, 5886], - [2329, 4135, 8307, 3736, 1004, 9991], - [2348, 1451, 3522, 2883, 3047, 6650], - [4047, 8828, 8732, 5743, 1091, 381]]) - - """6d Hartmann test function + def __init__(self): + "nothing" + self.sampled = False + self.init = False + self.scale = 1.0 + + ## General F + def f(self, X, sigma=0.00001, a=0.5): + # in X rows are points, cols are features + X = X * 8 + y = -np.sin(a * np.sum(X**2, axis=1)).reshape(X.shape[0], 1) + y = y + sigma * np.random.randn(X.shape[0], 1) + return y + + def f_bounds(self, N, n, d=1, L_infinity_ball=1.0): + x = np.random.uniform(-L_infinity_ball, L_infinity_ball, size=(N, d)) + # grid + if n == None: + xtest = None + else: + arrays = [ + np.linspace(-L_infinity_ball, L_infinity_ball, n).reshape(n, 1) + for i in range(d) + ] + xtest = cartesian(arrays) + return (d, xtest, x, 0.15) + + def f_opt_bounds(self, d=1, L_infinity_ball=1): + b = tuple([(-L_infinity_ball, L_infinity_ball) for i in range(d)]) + return b + + def optimize_f(self, d=1, a=0.5, L_infinity_ball=1): + from scipy.optimize import minimize + + grad = lambda x: np.cos(np.sum(x**2) / 2) * x + fun = lambda x: np.sin(np.sum(x**2) / 2) + 1 + + bounds = self.f_opt_bounds(d=d, L_infinity_ball=L_infinity_ball) + r = [] + for _ in range(500): + (d, _, x, _) = self.f_bounds(1, None, d=d, L_infinity_ball=L_infinity_ball) + x0 = x[0, :] + res = minimize(fun, x0, method="SLSQP", jac=grad, tol=0.0001, bounds=bounds) + r.append(fun(res.x)) + + print(d, max(r)) + + def sample_ss(self, X, sigma=0.001, gamma=1.0, GP=None): + # in X rows are points, cols are features + if self.sampled == False: + # print ("sampling") + if GP == None: + GP = stpy.continuous_processes.gauss_procc.GaussianProcess( + s=sigma, gamma=gamma + ) + self.sample = GP.sample(torch.from_numpy(self.xtest)).numpy() + mask = isin(self.xtest, X) + self.sampled = True + return ( + self.sample[mask, :].numpy() + + np.random.randn(X.shape[0], 1) * sigma + ) + else: + self.sample = GP.sample(torch.from_numpy(self.xtest)).numpy() + mask = isin(self.xtest, X) + self.sampled = True + return self.sample[mask, :] + np.random.randn(X.shape[0], 1) * sigma + else: + mask = isin(self.xtest, X) + return self.sample[mask, :] + np.random.randn(X.shape[0], 1) * sigma + + def sample_ss_bounds(self, N, n, d=1, L_infinity_ball=1.0, gamma=1.0): + # self.sampled = False + # grid + arrays = [ + np.linspace(-L_infinity_ball, L_infinity_ball, n).reshape(n, 1) + for i in range(d) + ] + xtest = cartesian(arrays) + self.xtest = xtest + self.n = n + # x = self.xtest[np.random.randint(0,n,size = N),:] + x = self.xtest[ + np.random.permutation(np.arange(0, self.xtest.shape[0], 1))[0:N], : + ] + x = np.sort(x, axis=0) + return (d, xtest, x, gamma) + + def sample_ss_reset(self): + self.samples = False + + def optimize(self, xtest, ytest, groups, s): + (n, d) = xtest.size() + kernel = stpy.kernels.KernelFunction( + kernel_name="ard", + gamma=torch.ones(d, dtype=torch.float64) * 0.1, + groups=groups, + ) + GP = stpy.continuous_processes.gauss_procc.GaussianProcess( + kernel_custom=kernel, s=s, d=d + ) + GP.fit_gp(xtest, ytest) + GP.optimize_params(type="bandwidth") + print("Optimized") + return torch.min(kernel.gamma) + + ## Branin Function + def branin(self, X, sigma=0.1): + if X.shape[1] != 2: + raise AssertionError("Invalid dimension of grid with Branin Function") + else: + xx = X[:, 0] + yy = X[:, 1] + y = ( + (yy - (5.1 / (4.0 * np.pi)) * (xx**2) + 5.0 / np.pi - 6.0) ** 2 + + 10.0 * (1.0 - 1.0 / (8.0 * np.pi)) * np.cos(xx) + + 10.0 + ) / 150 + y = -y.reshape(X.shape[0], 1) + return y + + def branin_bounds(self, N, n): + x = np.random.uniform(0, 10, size=(N, 2)) + # grid + if n == None: + xtest = None + else: + arrays = [ + np.linspace(-5, 10, n).reshape(n, 1), + np.linspace(0, 15, n).reshape(n, 1), + ] + xtest = cartesian(arrays) + return (2, xtest, x, 2.5) + + def branin_opt_bounds(self): + b = tuple([(-5, 10), (0, 15)]) + return b + + ## Camelback Function + def camelback(self, X, sigma=0.1): + if X.shape[1] != 2: + raise AssertionError("Invalid dimension of grid with Branin Function") + else: + xx = X[:, 0] * 4 + yy = X[:, 1] * 2 + y = ( + (4.0 - 2.1 * xx**2 + (xx**4) / 3.0) * (xx**2) + + xx * yy + + (-4.0 + 4 * (yy**2)) * (yy**2) + ) + y = -y.reshape(X.shape[0], 1) + # y = np.tanh(y) + y = y / 5.0 + return y / self.scale + sigma * np.random.randn(X.shape[0], 1) + + def camelback_bounds(self, N, n, adv_inv=False): + if adv_inv == False: + x = np.random.uniform(-0.5, 0.5, size=(N, 2)) + else: + x = np.random.uniform(-0.5, -0.4, size=(N, 2)) + # grid + if n == None: + xtest = None + else: + arrays = [ + np.linspace(-0.5, 0.5, n).reshape(n, 1), + np.linspace(-0.5, 0.5, n).reshape(n, 1), + ] + xtest = cartesian(arrays) + return (2, xtest, x, 0.1) + + def camelback_opt_bounds(self): + b = tuple([(-0.5, 0.5), (-0.5, 0.5)]) + return b + + def camelback_scale(self, xtest): + self.scale = np.max((self.camelback(xtest, sigma=0))) + print("Scaling:", self.scale) + + ## Hartmann 6 + def hartmann6(self, X, sigma=0.1): + if X.shape[1] != 6: + raise AssertionError("Invalid dimension of grid with Branin Function") + else: + # opt = np.array([[0.20169, 0.150011, 0.476874, 0.275332, 0.311652, 0.6573]]) + # fopt = np.array([[-3.32237]]) + + alpha = [1.00, 1.20, 3.00, 3.20] + A = np.array( + [ + [10.00, 3.00, 17.00, 3.50, 1.70, 8.00], + [0.05, 10.00, 17.00, 0.10, 8.00, 14.00], + [3.00, 3.50, 1.70, 10.00, 17.00, 8.00], + [17.00, 8.00, 0.05, 10.00, 0.10, 14.00], + ] + ) + P = 0.0001 * np.array( + [ + [1312, 1696, 5569, 124, 8283, 5886], + [2329, 4135, 8307, 3736, 1004, 9991], + [2348, 1451, 3522, 2883, 3047, 6650], + [4047, 8828, 8732, 5743, 1091, 381], + ] + ) + + """6d Hartmann test function input bounds: 0 <= xi <= 1, i = 1..6 global optimum: (0.20169, 0.150011, 0.476874, 0.275332, 0.311652, 0.6573), min function value = -3.32237 """ - external_sum = 0 - for i in range(4): - internal_sum = 0 - for j in range(6): - internal_sum = internal_sum + A[i, j] * (X[:, j] - P[i, j]) ** 2 - external_sum = external_sum + alpha[i] * np.exp(-internal_sum) - - return external_sum[:, np.newaxis] - - def hartmann6_bounds(self, N, n): - x = np.random.uniform(0, 1, size=(N, 6)) - # grid - if n == None: - xtest = None - else: - arrays = [np.linspace(0, 1, n).reshape(n, 1) for i in range(6)] - xtest = cartesian(arrays) - return (6, xtest, x, 0.5) - - def hartmann6_opt_bounds(self): - b = tuple([(0, 1) for i in range(6)]) - return b - - ## Hartmann 4 - def hartmann4(self, X, sigma=0.1): - if X.shape[1] != 4: - raise AssertionError("Invalid dimension of grid with Branin Function") - else: - - alpha = [1.00, 1.20, 3.00, 3.20] - - A = np.array([[10.00, 3.00, 17.00, 3.50, 1.70, 8.00], - [0.05, 10.00, 17.00, 0.10, 8.00, 14.00], - [3.00, 3.50, 1.70, 10.00, 17.00, 8.00], - [17.00, 8.00, 0.05, 10.00, 0.10, 14.00]]) - - P = 0.0001 * np.array([[1312, 1696, 5569, 124, 8283, 5886], - [2329, 4135, 8307, 3736, 1004, 9991], - [2348, 1451, 3522, 2883, 3047, 6650], - [4047, 8828, 8732, 5743, 1091, 381]]) - - """6d Hartmann test function + external_sum = 0 + for i in range(4): + internal_sum = 0 + for j in range(6): + internal_sum = internal_sum + A[i, j] * (X[:, j] - P[i, j]) ** 2 + external_sum = external_sum + alpha[i] * np.exp(-internal_sum) + + return external_sum[:, np.newaxis] + + def hartmann6_bounds(self, N, n): + x = np.random.uniform(0, 1, size=(N, 6)) + # grid + if n == None: + xtest = None + else: + arrays = [np.linspace(0, 1, n).reshape(n, 1) for i in range(6)] + xtest = cartesian(arrays) + return (6, xtest, x, 0.5) + + def hartmann6_opt_bounds(self): + b = tuple([(0, 1) for i in range(6)]) + return b + + ## Hartmann 4 + def hartmann4(self, X, sigma=0.1): + if X.shape[1] != 4: + raise AssertionError("Invalid dimension of grid with Branin Function") + else: + + alpha = [1.00, 1.20, 3.00, 3.20] + + A = np.array( + [ + [10.00, 3.00, 17.00, 3.50, 1.70, 8.00], + [0.05, 10.00, 17.00, 0.10, 8.00, 14.00], + [3.00, 3.50, 1.70, 10.00, 17.00, 8.00], + [17.00, 8.00, 0.05, 10.00, 0.10, 14.00], + ] + ) + + P = 0.0001 * np.array( + [ + [1312, 1696, 5569, 124, 8283, 5886], + [2329, 4135, 8307, 3736, 1004, 9991], + [2348, 1451, 3522, 2883, 3047, 6650], + [4047, 8828, 8732, 5743, 1091, 381], + ] + ) + + """6d Hartmann test function input bounds: 0 <= xi <= 1, i = 1..6 global optimum: (0.20169, 0.150011, 0.476874, 0.275332, 0.311652, 0.6573), min function value = -3.32237 """ - external_sum = 0 - for i in range(4): - internal_sum = 0 - for j in range(4): - internal_sum = internal_sum + A[i, j] * (X[:, j] - P[i, j]) ** 2 - external_sum = external_sum + alpha[i] * np.exp(-internal_sum) - - return external_sum[:, np.newaxis] - - def hartmann4_bounds(self, N, n): - x = np.random.uniform(0, 1, size=(N, 4)) - # grid - if n == None: - xtest = None - else: - arrays = [np.linspace(0, 1, n).reshape(n, 1) for i in range(4)] - xtest = cartesian(arrays) - return (4, xtest, x, 0.5) - - def hartmann4_opt_bounds(self): - b = tuple([(0, 1) for i in range(4)]) - return b - - def hartmann3(self, X, sigma=0.1): - - X_lower = np.array([0, 0, 0]) - X_upper = np.array([1, 1, 1]) - # opt = np.array([[0.114614, 0.555649, 0.852547]]) - # fopt = np.array([[-3.86278]]) - alpha = [1.0, 1.2, 3.0, 3.2] - A = np.array([[3.0, 10.0, 30.0], - [0.1, 10.0, 35.0], - [3.0, 10.0, 30.0], - [0.1, 10.0, 35.0]]) - P = 0.0001 * np.array([[3689, 1170, 2673], - [4699, 4387, 7470], - [1090, 8732, 5547], - [381, 5743, 8828]]) - - external_sum = 0 - for i in range(4): - internal_sum = 0 - for j in range(3): - internal_sum = internal_sum + A[i, j] * (X[:, j] - P[i, j]) ** 2 - - external_sum = external_sum + alpha[i] * np.exp(-internal_sum) - - return external_sum[:, np.newaxis] - - def hartmann3_bounds(self, N, n): - x = np.random.uniform(0, 1, size=(N, 3)) - # grid - if n == None: - xtest = None - else: - arrays = [np.linspace(0, 1, n).reshape(n, 1) for i in range(3)] - xtest = cartesian(arrays) - - return (3, xtest, x, 0.4) - - def hartmann3_opt_bounds(self): - b = tuple([(0, 1) for i in range(3)]) - return b - - def michal_old(self, X, sigma=0.1): - (n, d) = X.shape - sum_ = np.zeros(shape=(X.shape[0], 1)) - - for ii in range(d): - xi = X[:, ii] - # print ("xi",xi) - i = ii + 1 - new = np.sin(xi) * np.power((np.sin(i * np.power(xi, 2) / np.pi)), (2 * d)) - sum_ += new.reshape(n, 1) - return -0.5 * sum_ + np.random.randn(X.shape[0], 1) * sigma - - def stang_old(self, X, sigma=0.1): - (n, d) = X.shape - sum_ = np.zeros(shape=(X.shape[0], 1)) - - for ii in range(d): - xi = X[:, ii] - new = xi ** 4 - 16. * xi ** 2 + 5 * xi - sum_ += new.reshape(n, 1) - - sum_ = sum_ / (38.7122 * d) - # sum_ = sum_/d - - return -0.5 * sum_ + np.random.randn(X.shape[0], 1) * sigma - - def michal_un(self, X, sigma=0.1): - (n, d) = X.shape - X = (X + 0.5) * np.pi - ar = np.arange(1, d + 1, 1) - sum_ = np.sin(X) * np.power((np.sin(ar * X / np.pi)), (2 * d)) - sum_ = np.sum(sum_, axis=1).reshape(-1, 1) - return sum_ + np.random.randn(X.shape[0], 1) * sigma - - def michal(self, X, sigma=0.1): - (n, d) = X.shape - X = (X + 0.5) * np.pi - ar = np.arange(1, d + 1, 1) - sum_ = np.sin(X) * np.power((np.sin(ar * X / np.pi)), (2 * d)) - sum_ = np.sum(sum_, axis=1).reshape(-1, 1) - sum_ = sum_ / self.michal_optimum(d)[1] - return sum_ + np.random.randn(X.shape[0], 1) * sigma - - def michal_bounds(self, N, n, d=1, adv_inv=False): - if adv_inv == False: - x = np.random.uniform(-0.5, 0.5, size=(N, d)) - else: - x = np.random.uniform(-0.5, 0., size=(N, d)) - - if n == None: - xtest = None - else: - arrays = [np.linspace(-0.5, 0.5, n).reshape(n, 1) for i in range(d)] - xtest = cartesian(arrays) - - return (d, xtest, x, 0.3) - - def michal_opt_bounds(self, d): - b = tuple([(-0.5, 0.5) for i in range(d)]) - return b - - def michal_optimum(self, d): - q = 20 - opt = np.ones(shape=(q)) - opt[0] = 2.93254 - opt[1] = 2.34661 - opt[2] = 1.64107 - opt[3] = 1.24415 - opt[4] = 0.999643 - opt[5] = 0.834879 - opt[6] = 2.1089 - opt[7] = 1.84835 - opt[8] = 1.64448 - opt[9] = 1.48089 - opt[10] = 1.34678 - opt[11] = 1.2349 - opt[12] = 1.89701 - opt[13] = 1.76194 - opt[14] = 1.64477 - opt[15] = 1.54218 - opt[16] = 1.45162 - opt[17] = 1.37109 - opt[18] = 1.81774 - opt = opt[0:d].reshape(1, -1) - opt = (opt / np.pi) - 0.5 - value = self.michal_un(opt, sigma=0) - return (opt, value[0][0]) - - def stang_un(self, X, sigma=0.1): - (n, d) = X.shape - X = X * 8 - Y = X ** 2 - sum_ = np.sum(Y ** 2 - 16. * Y + 5 * X, axis=1).reshape(-1, 1) - sum_ = sum_ - return -0.5 * sum_ + np.random.randn(X.shape[0], 1) * sigma - - def stang(self, X, sigma=0.1): - (n, d) = X.shape - X = X * 8 - Y = X ** 2 - sum_ = np.sum(Y ** 2 - 16. * Y + 5 * X, axis=1).reshape(-1, 1) - sum_ = sum_ / self.stang_optimum(d)[1] - return -0.5 * sum_ + np.random.randn(X.shape[0], 1) * sigma - - def stang_bounds(self, N, n, d=1, adv_inv=False): - if adv_inv == False: - x = np.random.uniform(-0.5, 0.5, size=(N, d)) - else: - print("Adversarially initiallized") - x = np.random.uniform(0.4, 0.5, size=(N, d)) - - if n == None: - xtest = None - else: - arrays = [np.linspace(-0.5, 0.5, n).reshape(n, 1) for i in range(d)] - xtest = cartesian(arrays) - - return (d, xtest, x, 0.6) - - def stang_opt_bounds(self, d): - b = tuple([(-0.5, 0.5) for i in range(d)]) - return b - - def stang_optimum(self, d): - opt = np.ones(shape=(d)) * (-2.9035) - opt = opt / 8 - opt = opt.reshape(1, -1) - - value = self.stang_un(opt, sigma=0.0) - return (opt, value[0][0]) - - def double_group_un(self, X, sigma=0.1): - sum_ = np.sum(np.exp(-(np.diff(X, axis=1) / 0.25) ** 2), axis=1).reshape(-1, 1) - return 0.5 * sum_ + np.random.randn(X.shape[0], 1) * sigma - - def double_group(self, X, sigma=0.1): - (n, d) = X.shape - sum_ = np.sum(np.exp(-(np.diff(X, axis=1) / 0.25) ** 2), axis=1).reshape(-1, 1) - sum_ = sum_ / self.double_group_optimum(d)[1] - return 0.5 * sum_ + np.random.randn(X.shape[0], 1) * sigma - - def double_group_bounds(self, N, n, d=1, adv_inv=False): - if adv_inv == False: - x = np.random.uniform(-0.5, 0.5, size=(N, d)) - else: - print("Adversarially initiallized") - x = np.random.uniform(-0.5, -0.4, size=(N, d)) - - if n == None: - xtest = None - else: - arrays = [np.linspace(-0.5, 0.5, n).reshape(n, 1) for i in range(d)] - xtest = cartesian(arrays) - - return (d, xtest, x, 0.6) - - def double_group_opt_bounds(self, d): - b = tuple([(-0.5, 0.5) for i in range(d)]) - return b - - def double_group_optimum(self, d): - opt = np.zeros(shape=(1, d)) - value = self.double_group_un(opt, 0)[0][0] - return (opt, value) - - def swissfel(self, X, sigma=0.1): - if self.init == False: - raise AssertionError("Need to run bounds first.") - else: - if sigma == 0.0: - return self.model.predict(X)[0] - else: - return self.model.predict(X)[0] + np.random.randn(X.shape[0], 1) * self.noise - - def swissfel_bounds(self, N, n): - if self.init == False: - import os.path - fname = "/home/mojko/Documents/PhD/RFFinBO/code/test_problems/swissfel_model.p" - if not os.path.isfile(fname): - f = File('/home/mojko/Documents/PhD/RFFinBO/code/test_problems/evaluations.hdf5') - dset = f['1'] - X = dset["x"][:].reshape(-1, 5) - - # y response and scale - Y = dset["y"][:].reshape(-1, 1) - Y = Y / np.max(np.abs(Y)) - - # noise structure - Yerr = dset["y_std"] / np.max(np.abs(Y)) - self.noise = np.std(Yerr) - print("Estimated noise level", self.noise) - - # data scale to [-0.5,0.5] - X = dset["x"][:].reshape(-1, 5) - for j in range(5): - a = np.min(X[:, j]) - b = np.max(X[:, j]) - X[:, j] = (X[:, j] / (b - a)) - 0.5 - a / (b - a) - - ## fully additive kernel s - self.kernel = GPy.kern.RBF(1, active_dims=[0]) + GPy.kern.RBF(1, active_dims=[1]) \ - + GPy.kern.RBF(1, active_dims=[2]) + GPy.kern.RBF(1, active_dims=[3]) \ - + GPy.kern.RBF(1, active_dims=[4]) - self.model = GPy.models.GPRegression(X, Y, self.kernel) - print("Model fit") - self.model.optimize(messages=True) - print("ML likelihood fit") - self.init = True - # save pickle - pickle.dump(self.model, - open("/home/mojko/Documents/PhD/RFFinBO/code/test_problems/swissfel_model.p", "wb")) - pickle.dump(self.noise, - open("/home/mojko/Documents/PhD/RFFinBO/code/test_problems/swissfel_noise.p", "wb")) - else: - self.init = True - self.model = pickle.load( - open("/home/mojko/Documents/PhD/RFFinBO/code/test_problems/swissfel_model.p", "rb")) - self.noise = pickle.load( - open("/home/mojko/Documents/PhD/RFFinBO/code/test_problems/swissfel_noise.p", "rb")) - - if n == None: - xtest = None - else: - arrays = [np.linspace(-0.5, 0.5, n).reshape(n, 1) for i in range(5)] - xtest = cartesian(arrays) - - # bw = np.min(self.kernel.lengthscale) - x = np.random.uniform(-0.5, 0.5, size=(N, 5)) - return (5, xtest, x, 0.1) - - def swissfel_opt_bounds(self): - b = tuple([(-0.5, 0.5) for i in range(5)]) - return b - - def swissfel_optimum(self): - from scipy.optimize import minimize - # maximize the function - mybounds = self.swissfel_opt_bounds() - fun = lambda x: -self.swissfel(x.reshape(1, -1), sigma=0.0)[0][0] - - best = -10. - repeats = 10 - for i in range(repeats): - x0 = np.random.uniform(-0.5, 0.5, size=(5,)) - res = minimize(fun, x0, method="L-BFGS-B", tol=0.0001, bounds=mybounds) - value = self.swissfel(res.x.reshape(1, -1), sigma=0) - if value > best: - best = value - self.opt_loc = res.x.reshape(1, -1) - return (self.opt_loc, best) - - def neural_net(self, X, sigma=0.1): - (n, d) = X.shape - res = [] - val_size = 400 - if self.sampled == False: - self.sampled = True - try: - self.mnist = input_data.read_data_sets("~/.", one_hot=True, validation_size=val_size) - except: - self.mnist = input_data.read_data_sets("~/.", one_hot=True) - - for x in X: - (it, acc) = train_network(self.mnist, dropout=x[0], verbose=False, - val_size=val_size, maxiter=300, initialization_params=x[1:], no_filters_1=self.NN, - no_filters_2=self.NN2, val_count=30) - res.append(acc) - - return np.array(acc).reshape(n, 1) - - def neural_net_bounds(self, N, n, NN=16, NN2=22): - self.NN = NN - self.NN2 = NN2 - d = self.NN + self.NN2 - - x = np.random.uniform(0, 10, size=(N, d)) - dropout = np.random.uniform(0, 1, size=(N, 1)) - x = np.concatenate((x, dropout), axis=1) - - if n == None: - xtest = None - else: - arrays = [np.linspace(0, 1, n).reshape(n, 1)] + [np.linspace(0, 10, n).reshape(n, 1) for i in range(d)] - xtest = cartesian(arrays) - - return (d + 1, xtest, x, 0.9) - - def neural_net_opt_bounds(self): - d = self.NN + self.NN2 - b = tuple([(0, 1)] + [(0, 10) for i in range(d)]) - return b + external_sum = 0 + for i in range(4): + internal_sum = 0 + for j in range(4): + internal_sum = internal_sum + A[i, j] * (X[:, j] - P[i, j]) ** 2 + external_sum = external_sum + alpha[i] * np.exp(-internal_sum) + + return external_sum[:, np.newaxis] + + def hartmann4_bounds(self, N, n): + x = np.random.uniform(0, 1, size=(N, 4)) + # grid + if n == None: + xtest = None + else: + arrays = [np.linspace(0, 1, n).reshape(n, 1) for i in range(4)] + xtest = cartesian(arrays) + return (4, xtest, x, 0.5) + + def hartmann4_opt_bounds(self): + b = tuple([(0, 1) for i in range(4)]) + return b + + def hartmann3(self, X, sigma=0.1): + + X_lower = np.array([0, 0, 0]) + X_upper = np.array([1, 1, 1]) + # opt = np.array([[0.114614, 0.555649, 0.852547]]) + # fopt = np.array([[-3.86278]]) + alpha = [1.0, 1.2, 3.0, 3.2] + A = np.array( + [[3.0, 10.0, 30.0], [0.1, 10.0, 35.0], [3.0, 10.0, 30.0], [0.1, 10.0, 35.0]] + ) + P = 0.0001 * np.array( + [ + [3689, 1170, 2673], + [4699, 4387, 7470], + [1090, 8732, 5547], + [381, 5743, 8828], + ] + ) + + external_sum = 0 + for i in range(4): + internal_sum = 0 + for j in range(3): + internal_sum = internal_sum + A[i, j] * (X[:, j] - P[i, j]) ** 2 + + external_sum = external_sum + alpha[i] * np.exp(-internal_sum) + + return external_sum[:, np.newaxis] + + def hartmann3_bounds(self, N, n): + x = np.random.uniform(0, 1, size=(N, 3)) + # grid + if n == None: + xtest = None + else: + arrays = [np.linspace(0, 1, n).reshape(n, 1) for i in range(3)] + xtest = cartesian(arrays) + + return (3, xtest, x, 0.4) + + def hartmann3_opt_bounds(self): + b = tuple([(0, 1) for i in range(3)]) + return b + + def michal_old(self, X, sigma=0.1): + (n, d) = X.shape + sum_ = np.zeros(shape=(X.shape[0], 1)) + + for ii in range(d): + xi = X[:, ii] + # print ("xi",xi) + i = ii + 1 + new = np.sin(xi) * np.power((np.sin(i * np.power(xi, 2) / np.pi)), (2 * d)) + sum_ += new.reshape(n, 1) + return -0.5 * sum_ + np.random.randn(X.shape[0], 1) * sigma + + def stang_old(self, X, sigma=0.1): + (n, d) = X.shape + sum_ = np.zeros(shape=(X.shape[0], 1)) + + for ii in range(d): + xi = X[:, ii] + new = xi**4 - 16.0 * xi**2 + 5 * xi + sum_ += new.reshape(n, 1) + + sum_ = sum_ / (38.7122 * d) + # sum_ = sum_/d + + return -0.5 * sum_ + np.random.randn(X.shape[0], 1) * sigma + + def michal_un(self, X, sigma=0.1): + (n, d) = X.shape + X = (X + 0.5) * np.pi + ar = np.arange(1, d + 1, 1) + sum_ = np.sin(X) * np.power((np.sin(ar * X / np.pi)), (2 * d)) + sum_ = np.sum(sum_, axis=1).reshape(-1, 1) + return sum_ + np.random.randn(X.shape[0], 1) * sigma + + def michal(self, X, sigma=0.1): + (n, d) = X.shape + X = (X + 0.5) * np.pi + ar = np.arange(1, d + 1, 1) + sum_ = np.sin(X) * np.power((np.sin(ar * X / np.pi)), (2 * d)) + sum_ = np.sum(sum_, axis=1).reshape(-1, 1) + sum_ = sum_ / self.michal_optimum(d)[1] + return sum_ + np.random.randn(X.shape[0], 1) * sigma + + def michal_bounds(self, N, n, d=1, adv_inv=False): + if adv_inv == False: + x = np.random.uniform(-0.5, 0.5, size=(N, d)) + else: + x = np.random.uniform(-0.5, 0.0, size=(N, d)) + + if n == None: + xtest = None + else: + arrays = [np.linspace(-0.5, 0.5, n).reshape(n, 1) for i in range(d)] + xtest = cartesian(arrays) + + return (d, xtest, x, 0.3) + + def michal_opt_bounds(self, d): + b = tuple([(-0.5, 0.5) for i in range(d)]) + return b + + def michal_optimum(self, d): + q = 20 + opt = np.ones(shape=(q)) + opt[0] = 2.93254 + opt[1] = 2.34661 + opt[2] = 1.64107 + opt[3] = 1.24415 + opt[4] = 0.999643 + opt[5] = 0.834879 + opt[6] = 2.1089 + opt[7] = 1.84835 + opt[8] = 1.64448 + opt[9] = 1.48089 + opt[10] = 1.34678 + opt[11] = 1.2349 + opt[12] = 1.89701 + opt[13] = 1.76194 + opt[14] = 1.64477 + opt[15] = 1.54218 + opt[16] = 1.45162 + opt[17] = 1.37109 + opt[18] = 1.81774 + opt = opt[0:d].reshape(1, -1) + opt = (opt / np.pi) - 0.5 + value = self.michal_un(opt, sigma=0) + return (opt, value[0][0]) + + def stang_un(self, X, sigma=0.1): + (n, d) = X.shape + X = X * 8 + Y = X**2 + sum_ = np.sum(Y**2 - 16.0 * Y + 5 * X, axis=1).reshape(-1, 1) + sum_ = sum_ + return -0.5 * sum_ + np.random.randn(X.shape[0], 1) * sigma + + def stang(self, X, sigma=0.1): + (n, d) = X.shape + X = X * 8 + Y = X**2 + sum_ = np.sum(Y**2 - 16.0 * Y + 5 * X, axis=1).reshape(-1, 1) + sum_ = sum_ / self.stang_optimum(d)[1] + return -0.5 * sum_ + np.random.randn(X.shape[0], 1) * sigma + + def stang_bounds(self, N, n, d=1, adv_inv=False): + if adv_inv == False: + x = np.random.uniform(-0.5, 0.5, size=(N, d)) + else: + print("Adversarially initiallized") + x = np.random.uniform(0.4, 0.5, size=(N, d)) + + if n == None: + xtest = None + else: + arrays = [np.linspace(-0.5, 0.5, n).reshape(n, 1) for i in range(d)] + xtest = cartesian(arrays) + + return (d, xtest, x, 0.6) + + def stang_opt_bounds(self, d): + b = tuple([(-0.5, 0.5) for i in range(d)]) + return b + + def stang_optimum(self, d): + opt = np.ones(shape=(d)) * (-2.9035) + opt = opt / 8 + opt = opt.reshape(1, -1) + + value = self.stang_un(opt, sigma=0.0) + return (opt, value[0][0]) + + def double_group_un(self, X, sigma=0.1): + sum_ = np.sum(np.exp(-((np.diff(X, axis=1) / 0.25) ** 2)), axis=1).reshape( + -1, 1 + ) + return 0.5 * sum_ + np.random.randn(X.shape[0], 1) * sigma + + def double_group(self, X, sigma=0.1): + (n, d) = X.shape + sum_ = np.sum(np.exp(-((np.diff(X, axis=1) / 0.25) ** 2)), axis=1).reshape( + -1, 1 + ) + sum_ = sum_ / self.double_group_optimum(d)[1] + return 0.5 * sum_ + np.random.randn(X.shape[0], 1) * sigma + + def double_group_bounds(self, N, n, d=1, adv_inv=False): + if adv_inv == False: + x = np.random.uniform(-0.5, 0.5, size=(N, d)) + else: + print("Adversarially initiallized") + x = np.random.uniform(-0.5, -0.4, size=(N, d)) + + if n == None: + xtest = None + else: + arrays = [np.linspace(-0.5, 0.5, n).reshape(n, 1) for i in range(d)] + xtest = cartesian(arrays) + + return (d, xtest, x, 0.6) + + def double_group_opt_bounds(self, d): + b = tuple([(-0.5, 0.5) for i in range(d)]) + return b + + def double_group_optimum(self, d): + opt = np.zeros(shape=(1, d)) + value = self.double_group_un(opt, 0)[0][0] + return (opt, value) + + def swissfel(self, X, sigma=0.1): + if self.init == False: + raise AssertionError("Need to run bounds first.") + else: + if sigma == 0.0: + return self.model.predict(X)[0] + else: + return ( + self.model.predict(X)[0] + + np.random.randn(X.shape[0], 1) * self.noise + ) + + def swissfel_bounds(self, N, n): + if self.init == False: + import os.path + + fname = ( + "/home/mojko/Documents/PhD/RFFinBO/code/test_problems/swissfel_model.p" + ) + if not os.path.isfile(fname): + f = File( + "/home/mojko/Documents/PhD/RFFinBO/code/test_problems/evaluations.hdf5" + ) + dset = f["1"] + X = dset["x"][:].reshape(-1, 5) + + # y response and scale + Y = dset["y"][:].reshape(-1, 1) + Y = Y / np.max(np.abs(Y)) + + # noise structure + Yerr = dset["y_std"] / np.max(np.abs(Y)) + self.noise = np.std(Yerr) + print("Estimated noise level", self.noise) + + # data scale to [-0.5,0.5] + X = dset["x"][:].reshape(-1, 5) + for j in range(5): + a = np.min(X[:, j]) + b = np.max(X[:, j]) + X[:, j] = (X[:, j] / (b - a)) - 0.5 - a / (b - a) + + ## fully additive kernel s + self.kernel = ( + GPy.kern.RBF(1, active_dims=[0]) + + GPy.kern.RBF(1, active_dims=[1]) + + GPy.kern.RBF(1, active_dims=[2]) + + GPy.kern.RBF(1, active_dims=[3]) + + GPy.kern.RBF(1, active_dims=[4]) + ) + self.model = GPy.models.GPRegression(X, Y, self.kernel) + print("Model fit") + self.model.optimize(messages=True) + print("ML likelihood fit") + self.init = True + # save pickle + pickle.dump( + self.model, + open( + "/home/mojko/Documents/PhD/RFFinBO/code/test_problems/swissfel_model.p", + "wb", + ), + ) + pickle.dump( + self.noise, + open( + "/home/mojko/Documents/PhD/RFFinBO/code/test_problems/swissfel_noise.p", + "wb", + ), + ) + else: + self.init = True + self.model = pickle.load( + open( + "/home/mojko/Documents/PhD/RFFinBO/code/test_problems/swissfel_model.p", + "rb", + ) + ) + self.noise = pickle.load( + open( + "/home/mojko/Documents/PhD/RFFinBO/code/test_problems/swissfel_noise.p", + "rb", + ) + ) + + if n == None: + xtest = None + else: + arrays = [np.linspace(-0.5, 0.5, n).reshape(n, 1) for i in range(5)] + xtest = cartesian(arrays) + + # bw = np.min(self.kernel.lengthscale) + x = np.random.uniform(-0.5, 0.5, size=(N, 5)) + return (5, xtest, x, 0.1) + + def swissfel_opt_bounds(self): + b = tuple([(-0.5, 0.5) for i in range(5)]) + return b + + def swissfel_optimum(self): + from scipy.optimize import minimize + + # maximize the function + mybounds = self.swissfel_opt_bounds() + fun = lambda x: -self.swissfel(x.reshape(1, -1), sigma=0.0)[0][0] + + best = -10.0 + repeats = 10 + for i in range(repeats): + x0 = np.random.uniform(-0.5, 0.5, size=(5,)) + res = minimize(fun, x0, method="L-BFGS-B", tol=0.0001, bounds=mybounds) + value = self.swissfel(res.x.reshape(1, -1), sigma=0) + if value > best: + best = value + self.opt_loc = res.x.reshape(1, -1) + return (self.opt_loc, best) + + def neural_net(self, X, sigma=0.1): + (n, d) = X.shape + res = [] + val_size = 400 + if self.sampled == False: + self.sampled = True + try: + self.mnist = input_data.read_data_sets( + "~/.", one_hot=True, validation_size=val_size + ) + except: + self.mnist = input_data.read_data_sets("~/.", one_hot=True) + + for x in X: + (it, acc) = train_network( + self.mnist, + dropout=x[0], + verbose=False, + val_size=val_size, + maxiter=300, + initialization_params=x[1:], + no_filters_1=self.NN, + no_filters_2=self.NN2, + val_count=30, + ) + res.append(acc) + + return np.array(acc).reshape(n, 1) + + def neural_net_bounds(self, N, n, NN=16, NN2=22): + self.NN = NN + self.NN2 = NN2 + d = self.NN + self.NN2 + + x = np.random.uniform(0, 10, size=(N, d)) + dropout = np.random.uniform(0, 1, size=(N, 1)) + x = np.concatenate((x, dropout), axis=1) + + if n == None: + xtest = None + else: + arrays = [np.linspace(0, 1, n).reshape(n, 1)] + [ + np.linspace(0, 10, n).reshape(n, 1) for i in range(d) + ] + xtest = cartesian(arrays) + + return (d + 1, xtest, x, 0.9) + + def neural_net_opt_bounds(self): + d = self.NN + self.NN2 + b = tuple([(0, 1)] + [(0, 10) for i in range(d)]) + return b if __name__ == "__main__": - s = 0 - TT = test_function() - Fs = [lambda x: TT.f(x, sigma=s), lambda x: TT.branin(x, sigma=s), lambda x: TT.camelback(x, sigma=s), - lambda x: TT.hartmann3(x, sigma=s), lambda x: TT.hartmann4(x, sigma=s), lambda x: TT.hartmann6(x, sigma=s)] - Fbounds = [lambda n: TT.f_bounds(1, n), lambda n: TT.branin_bounds(1, n), lambda n: TT.camelback_bounds(1, n), - lambda n: TT.hartmann3_bounds(1, n), lambda n: TT.hartmann4_bounds(1, n), - lambda n: TT.hartmann6_bounds(1, n)] - ns = [4000, 200, 200, 100, 50, 10] - tests = ["1D", "Branin", "Camelback", "Hartmann3", "Hartmann4", "Hartmann6"] - z = [] - for i in range(6): - (d, xtest, x, _) = Fbounds[i](ns[i]) - z.append(np.max(Fs[i](xtest))) - print(tests[i], np.max(Fs[i](xtest))) - print(z) - - for d, n in zip([1, 2, 3, 4], [900, 100, 50, 3]): - G = lambda x: TT.stang(x, sigma=s) - (q, xtest, x, _) = TT.stang_bounds(1, n, d=d) - print(d, np.max(G(xtest)), np.max(G(xtest)) / d) - - # G = lambda x: TT.michal(x, sigma = s) - # (d,xtest,x,_) = TT.michal_bounds(1,5, d = 10) - # print (d, np.max(G(xtest)), np.max(G(xtest))/d) - - # for d in np.arange(1,31,1): - # TT.optimize_f(d = d) - - print("==== Optimized vs Non-Optimized ==== ") - print("Michal") - multistart = 400 - d = 10 - G1 = lambda x: TT.michal(x, sigma=0.) - fun = lambda x: -TT.michal(x.reshape(-1, 1), sigma=0.)[0][0] - (d, xtest, x, _) = TT.michal_bounds(20, None, d=d) - mybounds = TT.michal_opt_bounds(d=d) - - from scipy.optimize import minimize - - results = [] - for i in range(multistart): - x0 = np.random.randn(d) - for i in range(d): - x0[i] = np.random.uniform(mybounds[i][0], mybounds[i][1]) - res = minimize(fun, x0, method="L-BFGS-B", jac=None, tol=0.00001, bounds=mybounds) - # res = minimize(fun, x0, method = "SLSQP", jac = None, tol = 0.00001, bounds=mybounds) - solution = res.x - results.append([solution, -fun(solution)]) - results = np.array(results) - print(np.max(results[:, 1])) - - print("Stybtang") - for d in [10, 20]: - multistart = 400 - G1 = lambda x: TT.stang(x, sigma=0.) - fun = lambda x: -TT.stang(x.reshape(-1, 1), sigma=0.)[0][0] - (d, xtest, x, _) = TT.stang_bounds(20, None, d=d) - mybounds = TT.stang_opt_bounds(d=d) - from scipy.optimize import minimize - - results = [] - for i in range(multistart): - x0 = np.random.randn(d) - for i in range(d): - x0[i] = np.random.uniform(mybounds[i][0], mybounds[i][1]) - res = minimize(fun, x0, method="L-BFGS-B", jac=None, tol=0.00001, bounds=mybounds) - # res = minimize(fun, x0, method = "SLSQP", jac = None, tol = 0.00001, bounds=mybounds) - solution = res.x - results.append([solution, -fun(solution)]) - - results = np.array(results) - print(d, np.max(results[:, 1])) + s = 0 + TT = test_function() + Fs = [ + lambda x: TT.f(x, sigma=s), + lambda x: TT.branin(x, sigma=s), + lambda x: TT.camelback(x, sigma=s), + lambda x: TT.hartmann3(x, sigma=s), + lambda x: TT.hartmann4(x, sigma=s), + lambda x: TT.hartmann6(x, sigma=s), + ] + Fbounds = [ + lambda n: TT.f_bounds(1, n), + lambda n: TT.branin_bounds(1, n), + lambda n: TT.camelback_bounds(1, n), + lambda n: TT.hartmann3_bounds(1, n), + lambda n: TT.hartmann4_bounds(1, n), + lambda n: TT.hartmann6_bounds(1, n), + ] + ns = [4000, 200, 200, 100, 50, 10] + tests = ["1D", "Branin", "Camelback", "Hartmann3", "Hartmann4", "Hartmann6"] + z = [] + for i in range(6): + (d, xtest, x, _) = Fbounds[i](ns[i]) + z.append(np.max(Fs[i](xtest))) + print(tests[i], np.max(Fs[i](xtest))) + print(z) + + for d, n in zip([1, 2, 3, 4], [900, 100, 50, 3]): + G = lambda x: TT.stang(x, sigma=s) + (q, xtest, x, _) = TT.stang_bounds(1, n, d=d) + print(d, np.max(G(xtest)), np.max(G(xtest)) / d) + + # G = lambda x: TT.michal(x, sigma = s) + # (d,xtest,x,_) = TT.michal_bounds(1,5, d = 10) + # print (d, np.max(G(xtest)), np.max(G(xtest))/d) + + # for d in np.arange(1,31,1): + # TT.optimize_f(d = d) + + print("==== Optimized vs Non-Optimized ==== ") + print("Michal") + multistart = 400 + d = 10 + G1 = lambda x: TT.michal(x, sigma=0.0) + fun = lambda x: -TT.michal(x.reshape(-1, 1), sigma=0.0)[0][0] + (d, xtest, x, _) = TT.michal_bounds(20, None, d=d) + mybounds = TT.michal_opt_bounds(d=d) + + from scipy.optimize import minimize + + results = [] + for i in range(multistart): + x0 = np.random.randn(d) + for i in range(d): + x0[i] = np.random.uniform(mybounds[i][0], mybounds[i][1]) + res = minimize( + fun, x0, method="L-BFGS-B", jac=None, tol=0.00001, bounds=mybounds + ) + # res = minimize(fun, x0, method = "SLSQP", jac = None, tol = 0.00001, bounds=mybounds) + solution = res.x + results.append([solution, -fun(solution)]) + results = np.array(results) + print(np.max(results[:, 1])) + + print("Stybtang") + for d in [10, 20]: + multistart = 400 + G1 = lambda x: TT.stang(x, sigma=0.0) + fun = lambda x: -TT.stang(x.reshape(-1, 1), sigma=0.0)[0][0] + (d, xtest, x, _) = TT.stang_bounds(20, None, d=d) + mybounds = TT.stang_opt_bounds(d=d) + from scipy.optimize import minimize + + results = [] + for i in range(multistart): + x0 = np.random.randn(d) + for i in range(d): + x0[i] = np.random.uniform(mybounds[i][0], mybounds[i][1]) + res = minimize( + fun, x0, method="L-BFGS-B", jac=None, tol=0.00001, bounds=mybounds + ) + # res = minimize(fun, x0, method = "SLSQP", jac = None, tol = 0.00001, bounds=mybounds) + solution = res.x + results.append([solution, -fun(solution)]) + + results = np.array(results) + print(d, np.max(results[:, 1])) # print (G1(x)) # print (G2(x)) diff --git a/tests/SRI_test.py b/tests/SRI_test.py index 2598ca0..a1bf5d0 100755 --- a/tests/SRI_test.py +++ b/tests/SRI_test.py @@ -4,89 +4,93 @@ def get_angle(R): - v = torch.Tensor([1.0,1.0]).double() - a1 = np.arccos((torch.dot(v,R@v)/torch.dot(v,v)).numpy()) - a2 = np.arccos(-(torch.dot(v,R@v)/torch.dot(v,v)).numpy()) - return np.min([a1,a2]) + v = torch.Tensor([1.0, 1.0]).double() + a1 = np.arccos((torch.dot(v, R @ v) / torch.dot(v, v)).numpy()) + a2 = np.arccos(-(torch.dot(v, R @ v) / torch.dot(v, v)).numpy()) + return np.min([a1, a2]) if __name__ == "__main__": - from stpy.embeddings.embedding import HermiteEmbedding - N = 1 - s = 0.0001 - n = 20 - L_infinity_ball = 0.5 - - d = 2 - - thetae = np.radians(35.) - ce, se = np.cos(thetae), np.sin(thetae) - R = torch.from_numpy(np.array(((ce, -se), (se, ce)))) - - BenchmarkFunc = MichalBenchmark(d = d, R = R) - - x = BenchmarkFunc.initial_guess(N) - xtest = BenchmarkFunc.interval(n) - gamma = BenchmarkFunc.bandwidth() - bounds = BenchmarkFunc.bounds() - BenchmarkFunc.scale_max(xtest=xtest) - - print ("Gamma:",gamma) - - F = lambda x: BenchmarkFunc.eval(x, sigma=s) - F0 = lambda x: BenchmarkFunc.eval(x, sigma=0) - - - rot_out = open("rotOut.txt",'w') - - - m = 64 - GP = GaussianProcessFF(d=d, s=s, m = torch.ones(d)*m, gamma=gamma*torch.ones(d), bounds=bounds, groups = stpy.helpers.helper.full_group(d)) - #GP = GaussianProcess(d =d ,s = s, gamma = gamma*torch.ones(d) ,groups = stpy.helper.full_group(d)) - #GP = GaussianProcess(d=d, s=s, gamma=gamma, groups=None) - - m = 512 - embedding = HermiteEmbedding(gamma=gamma, m=m, d=d, diameter=1, approx = "hermite") - Map = lambda x: embedding.embed(x) - - - - x0 = torch.Tensor([0., 0.]).double().view(-1, d) -# Bandit = OPPR_TS_GP(x0, F, GP, Map, finite_dim=False, s = 10e-8) - Bandit = OPPR_TS_GP(x0, F, GP, Map, finite_dim=True, s = s, GPMap = True) - - Rep = 2 - Bandit.decolerate(x0,10e-9,Rep) - - print ("True:",thetae) - print (R) - print("Angle:",get_angle(R)) - - rot_out.write(str(get_angle(R))+"\n") - - print ("E design:\n",Bandit.Q) - print("Angle:",get_angle(Bandit.Q.detach())) - rot_out.write(str(get_angle(Bandit.Q.detach()))+"\n") - - # Gaussian Design - #Design = torch.randn(size = (Nd,d),dtype = torch.float64)*0.1 - - Design = Bandit.design - y = Bandit.value_design - for repeats in range(5): - B = Bandit.inverse_sliced_regression(Design,y,slices = Rep) - print ("Recovered from SRI:\n",B) - print (get_angle(B)) - rot_out.write(str(get_angle(B)) + " ") - - rot_out.write("\n") - BB = Bandit.bootstrap_inverse_sliced_regression(Design,y,slices = Rep,repeats = 20) - print ("Bootstrap",BB) - rot_out.write(str(get_angle(torch.from_numpy(BB)))+"\n") - - for _ in range(5): - Bandit.GP2.optimize_params(type="rots", restarts=1) - print (Bandit.GP2.Rot) - rot_out.write(str(get_angle(Bandit.GP2.Rot))+" ") - rot_out.write("\n") - rot_out.close() + from stpy.embeddings.embedding import HermiteEmbedding + + N = 1 + s = 0.0001 + n = 20 + L_infinity_ball = 0.5 + + d = 2 + + thetae = np.radians(35.0) + ce, se = np.cos(thetae), np.sin(thetae) + R = torch.from_numpy(np.array(((ce, -se), (se, ce)))) + + BenchmarkFunc = MichalBenchmark(d=d, R=R) + + x = BenchmarkFunc.initial_guess(N) + xtest = BenchmarkFunc.interval(n) + gamma = BenchmarkFunc.bandwidth() + bounds = BenchmarkFunc.bounds() + BenchmarkFunc.scale_max(xtest=xtest) + + print("Gamma:", gamma) + + F = lambda x: BenchmarkFunc.eval(x, sigma=s) + F0 = lambda x: BenchmarkFunc.eval(x, sigma=0) + + rot_out = open("rotOut.txt", "w") + + m = 64 + GP = GaussianProcessFF( + d=d, + s=s, + m=torch.ones(d) * m, + gamma=gamma * torch.ones(d), + bounds=bounds, + groups=stpy.helpers.helper.full_group(d), + ) + # GP = GaussianProcess(d =d ,s = s, gamma = gamma*torch.ones(d) ,groups = stpy.helper.full_group(d)) + # GP = GaussianProcess(d=d, s=s, gamma=gamma, groups=None) + + m = 512 + embedding = HermiteEmbedding(gamma=gamma, m=m, d=d, diameter=1, approx="hermite") + Map = lambda x: embedding.embed(x) + + x0 = torch.Tensor([0.0, 0.0]).double().view(-1, d) + # Bandit = OPPR_TS_GP(x0, F, GP, Map, finite_dim=False, s = 10e-8) + Bandit = OPPR_TS_GP(x0, F, GP, Map, finite_dim=True, s=s, GPMap=True) + + Rep = 2 + Bandit.decolerate(x0, 10e-9, Rep) + + print("True:", thetae) + print(R) + print("Angle:", get_angle(R)) + + rot_out.write(str(get_angle(R)) + "\n") + + print("E design:\n", Bandit.Q) + print("Angle:", get_angle(Bandit.Q.detach())) + rot_out.write(str(get_angle(Bandit.Q.detach())) + "\n") + + # Gaussian Design + # Design = torch.randn(size = (Nd,d),dtype = torch.float64)*0.1 + + Design = Bandit.design + y = Bandit.value_design + for repeats in range(5): + B = Bandit.inverse_sliced_regression(Design, y, slices=Rep) + print("Recovered from SRI:\n", B) + print(get_angle(B)) + rot_out.write(str(get_angle(B)) + " ") + + rot_out.write("\n") + BB = Bandit.bootstrap_inverse_sliced_regression(Design, y, slices=Rep, repeats=20) + print("Bootstrap", BB) + rot_out.write(str(get_angle(torch.from_numpy(BB))) + "\n") + + for _ in range(5): + Bandit.GP2.optimize_params(type="rots", restarts=1) + print(Bandit.GP2.Rot) + rot_out.write(str(get_angle(Bandit.GP2.Rot)) + " ") + rot_out.write("\n") + rot_out.close() diff --git a/tests/clenshaw_curtis_test.py b/tests/clenshaw_curtis_test.py index b3f96d1..c684070 100644 --- a/tests/clenshaw_curtis_test.py +++ b/tests/clenshaw_curtis_test.py @@ -3,35 +3,40 @@ if __name__ == "__main__": - ### Generate data - a sample from a Gaussian process - n = 1024 - N = 5 - gamma = 0.09 - #gamma = 1. - s = 0.2 - # benchmark = stpy.test_functions.benchmarks.GaussianProcessSample(d =1, gamma = gamma, sigma = s, n = n) - benchmark = stpy.test_functions.benchmarks.Simple1DFunction(d=1, sigma=s) - for j in range(10): - m = (2*(j+1)) ** 2 - #m = 64 - x = benchmark.initial_guess(N, adv_inv=False) - y = benchmark.eval(x) - xtest = benchmark.interval(1024) + ### Generate data - a sample from a Gaussian process + n = 1024 + N = 5 + gamma = 0.09 + # gamma = 1. + s = 0.2 + # benchmark = stpy.test_functions.benchmarks.GaussianProcessSample(d =1, gamma = gamma, sigma = s, n = n) + benchmark = stpy.test_functions.benchmarks.Simple1DFunction(d=1, sigma=s) + for j in range(10): + m = (2 * (j + 1)) ** 2 + # m = 64 + x = benchmark.initial_guess(N, adv_inv=False) + y = benchmark.eval(x) + xtest = benchmark.interval(1024) - #print (x) - CFF = stpy.continuous_processes.fourier_fea.GaussianProcessFF(gamma=gamma, approx="ccff", m=m, s=s) - QFF = stpy.continuous_processes.fourier_fea.GaussianProcessFF(gamma=gamma, approx="hermite", m=m, s=s) - TFF = stpy.continuous_processes.fourier_fea.GaussianProcessFF(gamma=gamma, approx="trapezoidal", m=m, s=s) + # print (x) + CFF = stpy.continuous_processes.fourier_fea.GaussianProcessFF( + gamma=gamma, approx="ccff", m=m, s=s + ) + QFF = stpy.continuous_processes.fourier_fea.GaussianProcessFF( + gamma=gamma, approx="hermite", m=m, s=s + ) + TFF = stpy.continuous_processes.fourier_fea.GaussianProcessFF( + gamma=gamma, approx="trapezoidal", m=m, s=s + ) - K1 = TFF.embed(x)@TFF.embed(x).T - K2 = QFF.embed(x) @ QFF.embed(x).T - K3 = CFF.embed(x) @ CFF.embed(x).T - # print(K2) - # print("----------------") - #print(K3) - # print("----------------") - print(m, torch.norm(K1 - K2), torch.norm(K2 -K3)) - - #CFF.fit_gp(x,y) - #CFF.visualize(xtest) + K1 = TFF.embed(x) @ TFF.embed(x).T + K2 = QFF.embed(x) @ QFF.embed(x).T + K3 = CFF.embed(x) @ CFF.embed(x).T + # print(K2) + # print("----------------") + # print(K3) + # print("----------------") + print(m, torch.norm(K1 - K2), torch.norm(K2 - K3)) + # CFF.fit_gp(x,y) + # CFF.visualize(xtest) diff --git a/tests/constrained_mean.py b/tests/constrained_mean.py index 1bec7ee..7491a48 100644 --- a/tests/constrained_mean.py +++ b/tests/constrained_mean.py @@ -5,19 +5,19 @@ import matplotlib.pyplot as plt if __name__ == "__main__": - d = 1 - p = 4 - embed_p = ChebyschevEmbedding(d=d, p=p) - m = embed_p.size - GP = KernelizedFeatures(embeding=embed_p, m=m, d=d) + d = 1 + p = 4 + embed_p = ChebyschevEmbedding(d=d, p=p) + m = embed_p.size + GP = KernelizedFeatures(embeding=embed_p, m=m, d=d) - x = torch.from_numpy(interval(10,d)) - xtest = torch.from_numpy(interval(1024, d)) - GP.fit_gp(x, x**8) + x = torch.from_numpy(interval(10, d)) + xtest = torch.from_numpy(interval(1024, d)) + GP.fit_gp(x, x**8) - mu = GP.mean_constrained(xtest, B = 0.5) + mu = GP.mean_constrained(xtest, B=0.5) - GP.visualize(xtest, show = False) - #plt.plot(x, x**8,'o') - plt.plot(xtest,mu) - plt.show() \ No newline at end of file + GP.visualize(xtest, show=False) + # plt.plot(x, x**8,'o') + plt.plot(xtest, mu) + plt.show() diff --git a/tests/continous_processes/psd_minimization/eigenvector_constraint.py b/tests/continous_processes/psd_minimization/eigenvector_constraint.py index 5cadc87..cad3c1f 100644 --- a/tests/continous_processes/psd_minimization/eigenvector_constraint.py +++ b/tests/continous_processes/psd_minimization/eigenvector_constraint.py @@ -4,7 +4,12 @@ import torch -from stpy.embeddings.embedding import HermiteEmbedding, RFFEmbedding, ConcatEmbedding, MaskedEmbedding +from stpy.embeddings.embedding import ( + HermiteEmbedding, + RFFEmbedding, + ConcatEmbedding, + MaskedEmbedding, +) from stpy.kernels import KernelFunction from stpy.helpers.helper import interval, interval_torch from stpy.probability.gaussian_likelihood import GaussianLikelihood @@ -21,12 +26,14 @@ m = 32 def stable_rank(A): - return np.trace(A)/np.max(np.linalg.eigh(A)[0]) - + return np.trace(A) / np.max(np.linalg.eigh(A)[0]) V = torch.linalg.qr(torch.randn(size=(m, m)).double())[0] - f = lambda x: 0.5*torch.sin(x * 20) * (x > 0).double() + 0.5*torch.sin(x * 30) * (x > 0).double() + f = ( + lambda x: 0.5 * torch.sin(x * 20) * (x > 0).double() + + 0.5 * torch.sin(x * 30) * (x > 0).double() + ) Xtrain = interval_torch(n=N, d=1) ytrain = f(Xtrain) @@ -45,8 +52,8 @@ def stable_rank(A): A1 = cp.Variable((m // 2, m // 2), PSD=True) A2 = cp.Variable((m // 2, m // 2), PSD=True) A3 = cp.Variable((m // 2, m // 2)) - l = cp.Variable((1,1)) - s = cp.Parameter((1, 1), nonneg = True) + l = cp.Variable((1, 1)) + s = cp.Parameter((1, 1), nonneg=True) likelihood = GaussianLikelihood(sigma=s) estimator = RegularizedDictionary(embedding, likelihood) @@ -55,43 +62,61 @@ def stable_rank(A): likelihood = estimator.likelihood likelihood.load_data(data) - total_trace = 2. + total_trace = 2.0 objective = likelihood.get_objective_cvxpy()(theta) A = cp.bmat([[A1, A3], [A3, A2]]) - s.value = np.array([[1.]]) - constraints = [cp.matrix_frac(theta, A) <= 1, cp.trace(A) <= total_trace*l, A >> 0,cp.lambda_max(A)<=l] + s.value = np.array([[1.0]]) + constraints = [ + cp.matrix_frac(theta, A) <= 1, + cp.trace(A) <= total_trace * l, + A >> 0, + cp.lambda_max(A) <= l, + ] prob = cp.Problem(cp.Minimize(objective), constraints) prob.solve(solver=cp.MOSEK, verbose=True) estimator.theta_fit = theta.value estimator.fitted = True - print (prob.value) - print (np.max(np.linalg.eigh(A.value)[0])) - print (l.value) + print(prob.value) + print(np.max(np.linalg.eigh(A.value)[0])) + print(l.value) print("--------------") if theta.value is not None: mu = estimator.mean(xtest) - plt.plot(xtest,mu, 'b', lw = 3, label = 'opt') - - plt.plot(Xtrain,ytrain,'ko', lw = 3) - plt.plot(xtest,f(xtest),'k--', lw = 3) - - constraints = [cp.matrix_frac(theta, A) <= 1, cp.trace(A) <= total_trace*l, A >> 0,cp.lambda_max(A)<=l, l<=s] + plt.plot(xtest, mu, "b", lw=3, label="opt") + + plt.plot(Xtrain, ytrain, "ko", lw=3) + plt.plot(xtest, f(xtest), "k--", lw=3) + + constraints = [ + cp.matrix_frac(theta, A) <= 1, + cp.trace(A) <= total_trace * l, + A >> 0, + cp.lambda_max(A) <= l, + l <= s, + ] prob = cp.Problem(cp.Minimize(objective), constraints) prob.solve(solver=cp.MOSEK, verbose=True) def cost(z): s.value = z prob.solve() - return prob.value, total_trace * l.value, l.value, (np.max(np.linalg.eigh(A.value)[0])), np.trace(A.value), stable_rank(A.value) - - z_vals = np.logspace(-5,5,20, base = 2) + return ( + prob.value, + total_trace * l.value, + l.value, + (np.max(np.linalg.eigh(A.value)[0])), + np.trace(A.value), + stable_rank(A.value), + ) + + z_vals = np.logspace(-5, 5, 20, base=2) l_vals = [] eigvals = [] differences = [] for z in z_vals: - prob_val, _, l_val, eigv, _ , _ = cost(np.array([[z]])) + prob_val, _, l_val, eigv, _, _ = cost(np.array([[z]])) estimator.theta_fit = theta.value estimator.fitted = True mu = estimator.mean(xtest) @@ -99,19 +124,18 @@ def cost(z): eigvals.append(float(eigv)) differences.append(float(l_val) - float(eigv)) - print (z, float(l_val) - float(eigv)) + print(z, float(l_val) - float(eigv)) - if float(l_val) - float(eigv) <= 1e-2 and float(l_val) - float(eigv)>=0: - plt.plot(xtest,mu, 'g--', lw = 3, label = 'stable-rank') + if float(l_val) - float(eigv) <= 1e-2 and float(l_val) - float(eigv) >= 0: + plt.plot(xtest, mu, "g--", lw=3, label="stable-rank") plt.show() - plt.plot(z_vals.reshape(-1),l_vals, label = 'lvals') - plt.plot(z_vals.reshape(-1),eigvals, label = 'eig') + plt.plot(z_vals.reshape(-1), l_vals, label="lvals") + plt.plot(z_vals.reshape(-1), eigvals, label="eig") # plt.plot(z_vals.reshape(-1), differences, label='diff') plt.legend() plt.show() - # # # Fix an eigenvector # v_init = np.zeros(shape=(m, 1)) @@ -173,4 +197,4 @@ def cost(z): # # grad = euclidean_gradient(w) # # w = w - eta * grad # # w = proj(w) - # # print (i, value(w)) \ No newline at end of file + # # print (i, value(w)) diff --git a/tests/continous_processes/psd_minimization/psd_minimization.py b/tests/continous_processes/psd_minimization/psd_minimization.py index 80c9e70..686ccdf 100644 --- a/tests/continous_processes/psd_minimization/psd_minimization.py +++ b/tests/continous_processes/psd_minimization/psd_minimization.py @@ -1,11 +1,21 @@ -from stpy.embeddings.embedding import HermiteEmbedding, RFFEmbedding, ConcatEmbedding, MaskedEmbedding +from stpy.embeddings.embedding import ( + HermiteEmbedding, + RFFEmbedding, + ConcatEmbedding, + MaskedEmbedding, +) import pymanopt import cvxpy as cp import numpy as np import torch from cvxpylayers.torch import CvxpyLayer -from stpy.embeddings.embedding import HermiteEmbedding, RFFEmbedding, ConcatEmbedding, MaskedEmbedding +from stpy.embeddings.embedding import ( + HermiteEmbedding, + RFFEmbedding, + ConcatEmbedding, + MaskedEmbedding, +) from stpy.kernels import KernelFunction from stpy.helpers.helper import interval, interval_torch from stpy.probability.gaussian_likelihood import GaussianLikelihood @@ -15,7 +25,6 @@ if __name__ == "__main__": - N = 10 n = 256 d = 1 @@ -32,71 +41,71 @@ xtest = torch.from_numpy(interval(n, d, L_infinity_ball=1)) kernel_object = KernelFunction(gamma=0.05, d=1) - #embedding = HermiteEmbedding(m=m, gamma = 1.) + # embedding = HermiteEmbedding(m=m, gamma = 1.) - embedding1 = NystromFeatures(kernel_object=kernel_object, m=m//2) + embedding1 = NystromFeatures(kernel_object=kernel_object, m=m // 2) embedding1.fit_gp(xtest / 2 - 0.5, None) - embedding2 = NystromFeatures(kernel_object=kernel_object, m=m//2) + embedding2 = NystromFeatures(kernel_object=kernel_object, m=m // 2) embedding2.fit_gp(xtest / 2 + 0.5, None) embedding = ConcatEmbedding([embedding1, embedding2]) - - theta = cp.Variable((m,1)) - A1 = cp.Variable((m//2,m//2), PSD = True) - A2 = cp.Variable((m//2, m//2), PSD=True) - A3 = cp.Variable((m//2, m//2)) + theta = cp.Variable((m, 1)) + A1 = cp.Variable((m // 2, m // 2), PSD=True) + A2 = cp.Variable((m // 2, m // 2), PSD=True) + A3 = cp.Variable((m // 2, m // 2)) t = cp.Variable() likelihood = GaussianLikelihood(sigma=s) estimator = RegularizedDictionary(embedding, likelihood) - data = (embedding.embed(Xtrain),ytrain) + data = (embedding.embed(Xtrain), ytrain) estimator.load_data(data) likelihood = estimator.likelihood likelihood.load_data(data) - total_trace = 5. + total_trace = 5.0 objective = likelihood.get_objective_cvxpy()(theta) - A = cp.bmat([[A1,A3],[A3,A2]]) - constraints = [cp.matrix_frac(theta, A) <= 1, cp.trace(A) <= total_trace, A >> 0] + A = cp.bmat([[A1, A3], [A3, A2]]) + constraints = [cp.matrix_frac(theta, A) <= 1, cp.trace(A) <= total_trace, A >> 0] prob = cp.Problem(cp.Minimize(objective), constraints) - prob.solve(solver = cp.MOSEK, verbose = True) + prob.solve(solver=cp.MOSEK, verbose=True) estimator.theta_fit = theta.value estimator.fitted = True - print (prob.value) - #plt.imshow(A.value) - #plt.show() + print(prob.value) + # plt.imshow(A.value) + # plt.show() if theta.value is not None: mu = estimator.mean(xtest) - plt.plot(xtest,mu, 'b', lw = 3, label = 'opt') - - plt.plot(Xtrain,ytrain,'ko', lw = 3) - plt.plot(xtest,f(xtest),'k--', lw = 3) + plt.plot(xtest, mu, "b", lw=3, label="opt") + plt.plot(Xtrain, ytrain, "ko", lw=3) + plt.plot(xtest, f(xtest), "k--", lw=3) - theta = cp.Variable((m,1)) + theta = cp.Variable((m, 1)) V = cp.Parameter((m, m)) objective = likelihood.get_objective_cvxpy()(theta) a = cp.Variable(m) - A = cp.Variable((m,m)) + A = cp.Variable((m, m)) - constraints = [cp.matrix_frac(V.T@theta, cp.diag(a)) <= 1., a>=0, cp.sum(a)<=total_trace] + constraints = [ + cp.matrix_frac(V.T @ theta, cp.diag(a)) <= 1.0, + a >= 0, + cp.sum(a) <= total_trace, + ] prob = cp.Problem(cp.Minimize(objective), constraints) - - - manifold = pymanopt.manifolds.Stiefel(m,m) + manifold = pymanopt.manifolds.Stiefel(m, m) def opt(V_val): V.value = V_val - prob.solve(solver = cp.MOSEK, verbose = False) + prob.solve(solver=cp.MOSEK, verbose=False) return theta.value @pymanopt.function.numpy(manifold) def cost(V_val): V.value = V_val - prob.solve(requires_grad=True, solver = cp.SCS) + prob.solve(requires_grad=True, solver=cp.SCS) return prob.value @pymanopt.function.numpy(manifold) @@ -106,56 +115,53 @@ def euclidean_gradient(V_val): prob.backward() return V.gradient - print ("INITIAL COST:", cost(np.eye(m))) + print("INITIAL COST:", cost(np.eye(m))) problem = pymanopt.Problem(manifold, cost, euclidean_gradient=euclidean_gradient) optimizer = pymanopt.optimizers.SteepestDescent(min_step_size=1e-15) - result = optimizer.run(problem, initial_point = np.eye(m)) + result = optimizer.run(problem, initial_point=np.eye(m)) V_val = result.point - #V_val = np.eye(m) - #print (result) - print (V_val@V_val.T) - print ("END COST:", cost(V_val)) + # V_val = np.eye(m) + # print (result) + print(V_val @ V_val.T) + print("END COST:", cost(V_val)) estimator.theta_fit = opt(V_val) estimator.fitted = True mu = estimator.mean(xtest) - plt.plot(xtest,mu, 'r--', lw = 3, label = 'ortho opt') - - - - - - + plt.plot(xtest, mu, "r--", lw=3, label="ortho opt") estimator.theta_fit = opt(np.eye(m)) mu = estimator.mean(xtest) - plt.plot(xtest,mu, 'g--', lw = 3, label = 'A identity') - + plt.plot(xtest, mu, "g--", lw=3, label="A identity") # simplified objective - theta = cp.Variable((m,1)) + theta = cp.Variable((m, 1)) objective = likelihood.get_objective_cvxpy()(theta) - constraints = [cp.sum_squares(theta) <= total_trace/m] + constraints = [cp.sum_squares(theta) <= total_trace / m] prob_simple = cp.Problem(cp.Minimize(objective), constraints) prob_simple.solve() - print ("SIMPLE COST:",prob_simple.value) + print("SIMPLE COST:", prob_simple.value) estimator.theta_fit = theta.value mu = estimator.mean(xtest) - plt.plot(xtest,mu, 'tab:purple', lw = 3, label = 'simple solution') + plt.plot(xtest, mu, "tab:purple", lw=3, label="simple solution") - theta = cp.Variable((m,1)) + theta = cp.Variable((m, 1)) V = cp.Parameter((m, m)) objective = likelihood.get_objective_cvxpy()(theta) a = cp.Variable(m) - A = cp.Variable((m,m), PSD=True) - constraints = [cp.matrix_frac(theta, cp.diag(a)) <= 1., a>=0, cp.sum(a)<=total_trace] + A = cp.Variable((m, m), PSD=True) + constraints = [ + cp.matrix_frac(theta, cp.diag(a)) <= 1.0, + a >= 0, + cp.sum(a) <= total_trace, + ] prob_complicated = cp.Problem(cp.Minimize(objective), constraints) - prob_complicated.solve(solver = cp.MOSEK , verbose = True) + prob_complicated.solve(solver=cp.MOSEK, verbose=True) estimator.theta_fit = theta.value mu = estimator.mean(xtest) - plt.plot(xtest,mu, 'tab:brown', lw = 3, label = 'soln') + plt.plot(xtest, mu, "tab:brown", lw=3, label="soln") plt.legend() - plt.show() \ No newline at end of file + plt.show() diff --git a/tests/continous_processes/test_estimators/domain_non_stationarity.py b/tests/continous_processes/test_estimators/domain_non_stationarity.py index 035d8be..1e09e57 100644 --- a/tests/continous_processes/test_estimators/domain_non_stationarity.py +++ b/tests/continous_processes/test_estimators/domain_non_stationarity.py @@ -13,19 +13,23 @@ m = 200 d = 1 sigma = 0.005 -lam = 1. +lam = 1.0 n = 256 I = torch.eye(m).double() -budget = m*1 -kernel_object = KernelFunction(gamma = 0.1, d = 1) +budget = m * 1 +kernel_object = KernelFunction(gamma=0.1, d=1) -embedding1 = TriangleEmbedding(m = m, d = 1, kernel_object=kernel_object, interval=[-1,0], offset=0.0) -embedding2 = TriangleEmbedding(m = m, d = 1, kernel_object=kernel_object, interval=[0,1], offset=0.0) +embedding1 = TriangleEmbedding( + m=m, d=1, kernel_object=kernel_object, interval=[-1, 0], offset=0.0 +) +embedding2 = TriangleEmbedding( + m=m, d=1, kernel_object=kernel_object, interval=[0, 1], offset=0.0 +) -embedding = ConcatEmbedding([embedding1,embedding2]) +embedding = ConcatEmbedding([embedding1, embedding2]) -likelihood_base = GaussianLikelihood(sigma = sigma) +likelihood_base = GaussianLikelihood(sigma=sigma) # for w,g in zip(weights,new_groups): @@ -38,12 +42,14 @@ N = 20 torch.manual_seed(2) + def zeroing(X): Y = X.clone() - Y[ X < 0.] = 0. + Y[X < 0.0] = 0.0 return Y -F = lambda X: (np.cos(X*10.)+np.sin(X*10.))*zeroing(X) + +F = lambda X: (np.cos(X * 10.0) + np.sin(X * 10.0)) * zeroing(X) # X = torch.rand(size = (N,d)).double()*0.25+0.5 # y = F(X) # @@ -58,73 +64,99 @@ def zeroing(X): # F = lambda X: estimator.mean(X) -Xtrain = torch.rand(size=(10, d)).double()/2 +Xtrain = torch.rand(size=(10, d)).double() / 2 ytrain = F(Xtrain) + sigma * torch.randn(size=(Xtrain.size()[0], 1)) - def update(): pass -alphas = [5,10]#,0.01,0.001] -lams_uns = [0.01,0.05,0.1] + + +alphas = [5, 10] # ,0.01,0.001] +lams_uns = [0.01, 0.05, 0.1] # alphas = [0.01] # lams_uns = [0.1] fig, axs = plt.subplots(len(alphas), len(lams_uns)) for index1, alpha in enumerate(alphas): - lams = [la/alpha for la in lams_uns]#, 0.01/alpha]#,16.,32.,64.,128.] + lams = [la / alpha for la in lams_uns] # , 0.01/alpha]#,16.,32.,64.,128.] for index2, lam in enumerate(lams): - print ("Regularizer:", alpha, lam) + print("Regularizer:", alpha, lam) - xtest = interval_torch(n = n,d = 1) + xtest = interval_torch(n=n, d=1) groups = [list(range(m)), list(range(m, 2 * m, 1))] new_groups = groups.copy() weights = [alpha**2 for g in groups] for j in range(len(groups)): for i in range(j + 1, len(groups), 1): new_groups.append(groups[j] + groups[i]) - weights.append(1.) + weights.append(1.0) - regularizer = NestedGroupL1L2Regularizer(lam = lam, groups = new_groups, weights = weights) + regularizer = NestedGroupL1L2Regularizer( + lam=lam, groups=new_groups, weights=weights + ) constraint = regularizer.get_constraint_object(budget) likelihood = GaussianLikelihood(sigma=sigma) - estimator_train = RegularizedDictionary(embedding, likelihood, regularizer, constraints = constraint, use_constraint=True) - - estimator_train.load_data((Xtrain,ytrain)) + estimator_train = RegularizedDictionary( + embedding, + likelihood, + regularizer, + constraints=constraint, + use_constraint=True, + ) + + estimator_train.load_data((Xtrain, ytrain)) estimator_train.fit() mean = estimator_train.mean(xtest) - - - - if max(len(alphas),len(lams_uns))>1: - #axs[index1,index2].subplot(len(lams),len(alphas),index1+1, index2+1) - axs[index1,index2].plot(Xtrain, ytrain, 'ro', ms=15) - axs[index1,index2].plot(xtest, F(xtest), lw = 4) - p = axs[index1,index2].plot(xtest, mean, lw = 4, label = "$\\lambda = "+str(lam)+", \\alpha ="+str(alpha)+" $") + if max(len(alphas), len(lams_uns)) > 1: + # axs[index1,index2].subplot(len(lams),len(alphas),index1+1, index2+1) + axs[index1, index2].plot(Xtrain, ytrain, "ro", ms=15) + axs[index1, index2].plot(xtest, F(xtest), lw=4) + p = axs[index1, index2].plot( + xtest, + mean, + lw=4, + label="$\\lambda = " + str(lam) + ", \\alpha =" + str(alpha) + " $", + ) # xtest1 = torch.linspace(0.0,0.5,n//4).double().view(-1,1) # xtest2 = torch.linspace(-1.0,-0.5,n//4).double().view(-1,1) # conf_xtest = torch.vstack([xtest1,xtest2]) - ucb = estimator_train.ucb(xtest, type = "LR_static") - lcb = estimator_train.lcb(xtest, type = "LR_static") - axs[index1,index2].fill_between(xtest.view(-1), lcb.view(-1), ucb.view(-1), alpha = 0.1, color = p[0].get_color()) - #axs[index1,index2].legend(fontsize = 15) + ucb = estimator_train.ucb(xtest, type="LR_static") + lcb = estimator_train.lcb(xtest, type="LR_static") + axs[index1, index2].fill_between( + xtest.view(-1), + lcb.view(-1), + ucb.view(-1), + alpha=0.1, + color=p[0].get_color(), + ) + # axs[index1,index2].legend(fontsize = 15) else: - axs.plot(Xtrain, ytrain, 'ro', ms=15) + axs.plot(Xtrain, ytrain, "ro", ms=15) axs.plot(xtest, F(xtest), lw=4) - p = axs.plot(xtest, mean, lw=4, - label="$\\lambda = " + str(lam) + ", \\alpha =" + str(alpha) + " $") + p = axs.plot( + xtest, + mean, + lw=4, + label="$\\lambda = " + str(lam) + ", \\alpha =" + str(alpha) + " $", + ) # xtest1 = torch.linspace(0.0,0.5,n//4).double().view(-1,1) # xtest2 = torch.linspace(-1.0,-0.5,n//4).double().view(-1,1) # conf_xtest = torch.vstack([xtest1,xtest2]) ucb = estimator_train.ucb(xtest, type="LR_static") lcb = estimator_train.lcb(xtest, type="LR_static") - axs.fill_between(xtest.view(-1), lcb.view(-1), ucb.view(-1), alpha=0.1, - color=p[0].get_color()) - #axs.legend(fontsize=15) -plt.savefig("image.png", dpi = 300) -plt.show() \ No newline at end of file + axs.fill_between( + xtest.view(-1), + lcb.view(-1), + ucb.view(-1), + alpha=0.1, + color=p[0].get_color(), + ) + # axs.legend(fontsize=15) +plt.savefig("image.png", dpi=300) +plt.show() diff --git a/tests/continous_processes/test_estimators/group l_q_estimator.py b/tests/continous_processes/test_estimators/group l_q_estimator.py index 86a72e1..d431303 100644 --- a/tests/continous_processes/test_estimators/group l_q_estimator.py +++ b/tests/continous_processes/test_estimators/group l_q_estimator.py @@ -16,48 +16,48 @@ m = 128 d = 1 sigma = 0.01 -lam = 1. +lam = 1.0 n = 256 N = 10 -kernel_object = KernelFunction(gamma = 0.05, d = 1) -#embedding = HermiteEmbedding(m = m, d = 1) -xtest = interval_torch(n = n,d = 1) +kernel_object = KernelFunction(gamma=0.05, d=1) +# embedding = HermiteEmbedding(m = m, d = 1) +xtest = interval_torch(n=n, d=1) -embedding1 = NystromFeatures(kernel_object = kernel_object, m = m ) -embedding1.fit_gp(xtest/2-0.5,None) -embedding2 = NystromFeatures(kernel_object = kernel_object, m = m ) -embedding2.fit_gp(xtest/2+0.5,None) -embedding = ConcatEmbedding([embedding1,embedding2]) +embedding1 = NystromFeatures(kernel_object=kernel_object, m=m) +embedding1.fit_gp(xtest / 2 - 0.5, None) +embedding2 = NystromFeatures(kernel_object=kernel_object, m=m) +embedding2.fit_gp(xtest / 2 + 0.5, None) +embedding = ConcatEmbedding([embedding1, embedding2]) -qs = [0.01, 0.2,0.5,0.8] +qs = [0.01, 0.2, 0.5, 0.8] groups = [list(range(m)), list(range(m, 2 * m, 1))] -print (groups) +print(groups) regularizers = [] -#regularizers += [L1Regularizer(lam = lam), L2Regularizer(lam = lam)] -#regularizers += [NonConvexLqRegularizer(lam = lam, q = q) for q in qs] -regularizers += [GroupNonCovexLqRegularizer(lam = lam, q = q, groups=groups) for q in qs] +# regularizers += [L1Regularizer(lam = lam), L2Regularizer(lam = lam)] +# regularizers += [NonConvexLqRegularizer(lam = lam, q = q) for q in qs] +regularizers += [GroupNonCovexLqRegularizer(lam=lam, q=q, groups=groups) for q in qs] likelihood = GaussianLikelihood(sigma=sigma) names = [] -#names += ["L1", "L2"] -#names += ["L"+str(q) for q in qs] -names += ["group L"+str(q) for q in qs] +# names += ["L1", "L2"] +# names += ["L"+str(q) for q in qs] +names += ["group L" + str(q) for q in qs] -f = lambda x: torch.sin(x*20)*(x>0).double() -Xtrain = interval_torch(n = N, d= 1) +f = lambda x: torch.sin(x * 20) * (x > 0).double() +Xtrain = interval_torch(n=N, d=1) ytrain = f(Xtrain) -for name,regularizer in zip(names,regularizers): +for name, regularizer in zip(names, regularizers): estimator = RegularizedDictionary(embedding, likelihood, regularizer) - estimator.load_data((Xtrain,ytrain)) + estimator.load_data((Xtrain, ytrain)) estimator.fit() mean = estimator.mean(xtest) print(name, "support:", torch.sum(estimator.theta_fit > 1e-8)) - plt.plot(xtest, mean, label = name, lw = 3, alpha = 0.5) + plt.plot(xtest, mean, label=name, lw=3, alpha=0.5) -plt.plot(Xtrain,ytrain,'ko', lw = 3) -plt.plot(xtest,f(xtest),'k--', lw = 3) +plt.plot(Xtrain, ytrain, "ko", lw=3) +plt.plot(xtest, f(xtest), "k--", lw=3) plt.legend() -plt.show() \ No newline at end of file +plt.show() diff --git a/tests/continous_processes/test_estimators/group_l_q_estimator_budget.py b/tests/continous_processes/test_estimators/group_l_q_estimator_budget.py index c22ffc5..0798487 100644 --- a/tests/continous_processes/test_estimators/group_l_q_estimator_budget.py +++ b/tests/continous_processes/test_estimators/group_l_q_estimator_budget.py @@ -6,6 +6,7 @@ from stpy.kernels import KernelFunction from stpy.regularization.simplex_regularizer import SupRegularizer from stpy.continuous_processes.mkl_estimator import MultipleKernelLearner + """ This script test and compares Lq estimators compare L1, L2 and Lq estimators @@ -14,35 +15,41 @@ m = 128 d = 1 sigma = 0.01 -lam = 1. +lam = 1.0 n = 128 N = 10 -kernel_object = KernelFunction(gamma = 0.05, d = 1) -#embedding = HermiteEmbedding(m = m, d = 1) -xtest = interval_torch(n = n,d = 1) +kernel_object = KernelFunction(gamma=0.05, d=1) +# embedding = HermiteEmbedding(m = m, d = 1) +xtest = interval_torch(n=n, d=1) + +embedding1 = NystromFeatures(kernel_object=kernel_object, m=m) +embedding1.fit_gp(xtest / 2 - 0.7, None) +embedding2 = NystromFeatures(kernel_object=kernel_object, m=m) +embedding2.fit_gp(xtest / 2 + 0.7, None) +embedding = ConcatEmbedding([embedding1, embedding2]) + + +def k1(x, y, **kwagrs): + return (embedding1.embed(x) @ embedding1.embed(y).T).T -embedding1 = NystromFeatures(kernel_object = kernel_object, m = m ) -embedding1.fit_gp(xtest/2-0.7,None) -embedding2 = NystromFeatures(kernel_object = kernel_object, m = m ) -embedding2.fit_gp(xtest/2+0.7,None) -embedding = ConcatEmbedding([embedding1,embedding2]) -def k1(x,y,**kwagrs): - return (embedding1.embed(x)@embedding1.embed(y).T).T +def k2(x, y, **kwagrs): + return (embedding2.embed(x) @ embedding2.embed(y).T).T -def k2(x,y,**kwagrs): - return (embedding2.embed(x)@embedding2.embed(y).T).T -kernel_object_1 = KernelFunction(kernel_function = k1) -kernel_object_2 = KernelFunction(kernel_function = k2) +kernel_object_1 = KernelFunction(kernel_function=k1) +kernel_object_2 = KernelFunction(kernel_function=k2) kernels = [kernel_object_1, kernel_object_2] regularizer = SupRegularizer(d=len(kernels), lam=0.99, constrained=True) mkl = MultipleKernelLearner(kernels, regularizer=regularizer) -f = lambda x: torch.sin(x*20)*(x<0).double() + (1e-5)*torch.sin(x*20)*(x>0).double() -Xtrain = interval_torch(n = N, d= 1, L_infinity_ball=0.25) - 0.75 +f = ( + lambda x: torch.sin(x * 20) * (x < 0).double() + + (1e-5) * torch.sin(x * 20) * (x > 0).double() +) +Xtrain = interval_torch(n=N, d=1, L_infinity_ball=0.25) - 0.75 ytrain = f(Xtrain) # @@ -85,9 +92,9 @@ def k2(x,y,**kwagrs): mkl.load_data((Xtrain, ytrain)) mkl.fit() mean = mkl.mean(xtest) -p = plt.plot(xtest, mean, label="MKL", linestyle="-", lw=3, color='tab:purple') +p = plt.plot(xtest, mean, label="MKL", linestyle="-", lw=3, color="tab:purple") -plt.plot(Xtrain,ytrain,'ko', lw = 3) -plt.plot(xtest,f(xtest),'k--', lw = 3) +plt.plot(Xtrain, ytrain, "ko", lw=3) +plt.plot(xtest, f(xtest), "k--", lw=3) plt.legend() -plt.show() \ No newline at end of file +plt.show() diff --git a/tests/continous_processes/test_estimators/l_q_estimator.py b/tests/continous_processes/test_estimators/l_q_estimator.py index f04484d..931aed5 100644 --- a/tests/continous_processes/test_estimators/l_q_estimator.py +++ b/tests/continous_processes/test_estimators/l_q_estimator.py @@ -16,54 +16,69 @@ m = 64 d = 1 sigma = 0.01 -lam = 1. +lam = 1.0 n = 4 N = 3 -total_norm = 1. -xtest = interval_torch(n = n,d = 1) -kernel_object = KernelFunction(gamma = 0.05, d = 1) -embedding = HermiteEmbedding(m = m, d = 1) +total_norm = 1.0 +xtest = interval_torch(n=n, d=1) +kernel_object = KernelFunction(gamma=0.05, d=1) +embedding = HermiteEmbedding(m=m, d=1) total_norms = [1] -for pos,total_norm in enumerate(total_norms): - lasso_regularizer = L1Regularizer(lam = lam) - l2_regularizer = L2Regularizer(lam = lam) +for pos, total_norm in enumerate(total_norms): + lasso_regularizer = L1Regularizer(lam=lam) + l2_regularizer = L2Regularizer(lam=lam) qs = [0.1] - regularizers = [l2_regularizer,l2_regularizer] - #regularizers += [NonConvexLqRegularizer(lam = lam, q = q) for q in qs] - constraints = [lasso_regularizer.get_constraint_object(total_norm), l2_regularizer.get_constraint_object(total_norm)] - #constraints=+ [None for q in qs] + regularizers = [l2_regularizer, l2_regularizer] + # regularizers += [NonConvexLqRegularizer(lam = lam, q = q) for q in qs] + constraints = [ + lasso_regularizer.get_constraint_object(total_norm), + l2_regularizer.get_constraint_object(total_norm), + ] + # constraints=+ [None for q in qs] - - constraints += [ NonConvexNormConstraint(0.5, total_norm, m)] - regularizers += [L2Regularizer(lam = lam)] + constraints += [NonConvexNormConstraint(0.5, total_norm, m)] + regularizers += [L2Regularizer(lam=lam)] likelihood = GaussianLikelihood(sigma=sigma) names = ["L1", "L2"] - #names += ["L"+str(q) for q in qs] + # names += ["L"+str(q) for q in qs] names += ["Lspecial"] - f = lambda x: torch.sin(x*20) - Xtrain = interval_torch(n = N, d= 1) + f = lambda x: torch.sin(x * 20) + Xtrain = interval_torch(n=N, d=1) ytrain = f(Xtrain) - linestyles = ['-.','-','--'] - #plt.subplot(2,len(total_norms)//2,pos+1) - for name,regularizer,constraint, linestyle in zip(names,regularizers,constraints,linestyles): - print (name) - estimator = RegularizedDictionary(embedding, likelihood, regularizer, constraints=constraint, use_constraint=True) - estimator.load_data((Xtrain,ytrain)) + linestyles = ["-.", "-", "--"] + # plt.subplot(2,len(total_norms)//2,pos+1) + for name, regularizer, constraint, linestyle in zip( + names, regularizers, constraints, linestyles + ): + print(name) + estimator = RegularizedDictionary( + embedding, + likelihood, + regularizer, + constraints=constraint, + use_constraint=True, + ) + estimator.load_data((Xtrain, ytrain)) estimator.fit() mean = estimator.mean(xtest) lcb = estimator.lcb(xtest) ucb = estimator.ucb(xtest) - p = plt.plot(xtest, mean, label=name, linestyle = linestyle) - plt.fill_between(xtest.view(-1),lcb.view(-1),ucb.view(-1), alpha = 0.1, color = p[0].get_color()) + p = plt.plot(xtest, mean, label=name, linestyle=linestyle) + plt.fill_between( + xtest.view(-1), + lcb.view(-1), + ucb.view(-1), + alpha=0.1, + color=p[0].get_color(), + ) print(name, "support:", torch.sum(estimator.theta_fit > 0.01)) - print (estimator.theta_fit.T) - + print(estimator.theta_fit.T) - plt.plot(Xtrain,ytrain,'o') - plt.plot(xtest,f(xtest),'k--') + plt.plot(Xtrain, ytrain, "o") + plt.plot(xtest, f(xtest), "k--") plt.legend() -plt.show() \ No newline at end of file +plt.show() diff --git a/tests/continous_processes/test_estimators/qff_nonstationary.py b/tests/continous_processes/test_estimators/qff_nonstationary.py index 19407fc..576f73c 100644 --- a/tests/continous_processes/test_estimators/qff_nonstationary.py +++ b/tests/continous_processes/test_estimators/qff_nonstationary.py @@ -12,92 +12,105 @@ m = 128 d = 1 sigma = 0.01 -lam = 1. +lam = 1.0 n = 256 n_small = 16 I = torch.eye(m).double() budget = 100 -kernel_object = KernelFunction(gamma = 0.05, d = 1) +kernel_object = KernelFunction(gamma=0.05, d=1) -embedding_base = HermiteEmbedding(m = m, d = 1) +embedding_base = HermiteEmbedding(m=m, d=1) -def zero_out_interval(x,interval): - mask1 = x[:,0] > interval[0] - mask2 = x[:,0] < interval[1] - #return torch.from_numpy(gaussian_filter(torch.logical_and(mask1,mask2).double(),sigma=10)) - return torch.logical_and(mask1,mask2).double() +def zero_out_interval(x, interval): + mask1 = x[:, 0] > interval[0] + mask2 = x[:, 0] < interval[1] + # return torch.from_numpy(gaussian_filter(torch.logical_and(mask1,mask2).double(),sigma=10)) + return torch.logical_and(mask1, mask2).double() -xtest = interval_torch(n = n,d = 1) +xtest = interval_torch(n=n, d=1) -embedding1 = NystromFeatures(kernel_object = kernel_object, m = m ) -embedding1.fit_gp((xtest-1)/2-0.5,None) -embedding2 = NystromFeatures(kernel_object = kernel_object, m = m ) -embedding2.fit_gp((xtest-1)/2,None) -embedding3 = NystromFeatures(kernel_object = kernel_object, m = m ) -embedding3.fit_gp((xtest+1)/2,None) -embedding4 = NystromFeatures(kernel_object = kernel_object, m = m ) -embedding4.fit_gp((xtest+1)/2+0.5,None) -embedding = ConcatEmbedding([embedding1,embedding2,embedding3,embedding4]) +embedding1 = NystromFeatures(kernel_object=kernel_object, m=m) +embedding1.fit_gp((xtest - 1) / 2 - 0.5, None) +embedding2 = NystromFeatures(kernel_object=kernel_object, m=m) +embedding2.fit_gp((xtest - 1) / 2, None) +embedding3 = NystromFeatures(kernel_object=kernel_object, m=m) +embedding3.fit_gp((xtest + 1) / 2, None) +embedding4 = NystromFeatures(kernel_object=kernel_object, m=m) +embedding4.fit_gp((xtest + 1) / 2 + 0.5, None) -likelihood_base = GaussianLikelihood(sigma = sigma) -groups = [list(range(m)),list(range(m,2*m,1)),list(range(2*m,3*m,1)),list(range(3*m,4*m,1))] +embedding = ConcatEmbedding([embedding1, embedding2, embedding3, embedding4]) -regularizer_base = GroupL1L2Regularizer(lam=1., groups=groups) +likelihood_base = GaussianLikelihood(sigma=sigma) +groups = [ + list(range(m)), + list(range(m, 2 * m, 1)), + list(range(2 * m, 3 * m, 1)), + list(range(3 * m, 4 * m, 1)), +] + +regularizer_base = GroupL1L2Regularizer(lam=1.0, groups=groups) constraint_base = regularizer_base.get_constraint_object(budget) -estimator = RegularizedDictionary(embedding, likelihood_base, regularizer_base, constraints=constraint_base, use_constraint=False) +estimator = RegularizedDictionary( + embedding, + likelihood_base, + regularizer_base, + constraints=constraint_base, + use_constraint=False, +) -lams = [1.]#,16.,32.,64.,128.] +lams = [1.0] # ,16.,32.,64.,128.] N = 3 -v = torch.randn(size = (embedding.get_m(),1)).double() -for i in [0,1,3]: - v[groups[i]] = 0. -v = (v/np.sqrt(regularizer_base.eval(v))) +v = torch.randn(size=(embedding.get_m(), 1)).double() +for i in [0, 1, 3]: + v[groups[i]] = 0.0 +v = v / np.sqrt(regularizer_base.eval(v)) -F = lambda X: embedding.embed(X)@v*np.sqrt(budget) -X = torch.rand(size = (10,d)).double()*0.25+0.1 +F = lambda X: embedding.embed(X) @ v * np.sqrt(budget) +X = torch.rand(size=(10, d)).double() * 0.25 + 0.1 y = F(X) -#Xpoint = torch.Tensor([[0.],[0.5]]).double() -#ypoint = torch.Tensor([[0.],[0.]]).double() +# Xpoint = torch.Tensor([[0.],[0.5]]).double() +# ypoint = torch.Tensor([[0.],[0.]]).double() -#X = torch.vstack([X,Xpoint]) -#y = torch.vstack([y,ypoint]) -estimator.load_data((X,y)) +# X = torch.vstack([X,Xpoint]) +# y = torch.vstack([y,ypoint]) +estimator.load_data((X, y)) estimator.fit() F = lambda X: estimator.mean(X) Xtrain = torch.rand(size=(N, d)).double() * 0.5 ytrain = F(Xtrain) + sigma * torch.randn(size=(Xtrain.size()[0], 1)) -lams = [8.,16.,32.]#,16.,32.,64.,128.] +lams = [8.0, 16.0, 32.0] # ,16.,32.,64.,128.] ##lams = [1.,128.] epsilon = 1e-1 -#lams = [1.] +# lams = [1.] for index, lam in enumerate(lams): - print (index,':',lam) - print ("budget:",budget) + print(index, ":", lam) + print("budget:", budget) - plt.subplot(len(lams),1,index+1) - plt.plot(Xtrain, ytrain, 'ro', ms=25) - plt.ylim([-3,3]) - regularizer = GroupL1L2Regularizer(lam = lam, groups = groups) + plt.subplot(len(lams), 1, index + 1) + plt.plot(Xtrain, ytrain, "ro", ms=25) + plt.ylim([-3, 3]) + regularizer = GroupL1L2Regularizer(lam=lam, groups=groups) constraint = regularizer.get_constraint_object(budget) likelihood = GaussianLikelihood(sigma=sigma) - estimator_train = RegularizedDictionary(embedding, likelihood, regularizer, constraints = constraint, use_constraint=True) - + estimator_train = RegularizedDictionary( + embedding, likelihood, regularizer, constraints=constraint, use_constraint=True + ) - xtest = interval_torch(n = n,d = 1) - xtest_small = interval_torch(n = n_small, d = 1) - plt.plot(xtest, F(xtest), lw = 5) + xtest = interval_torch(n=n, d=1) + xtest_small = interval_torch(n=n_small, d=1) + plt.plot(xtest, F(xtest), lw=5) - estimator_train.load_data((Xtrain,ytrain)) + estimator_train.load_data((Xtrain, ytrain)) estimator_train.fit() mean = estimator_train.mean(xtest) @@ -107,15 +120,21 @@ def zero_out_interval(x,interval): print(regularizer.eval(estimator_train.theta_fit)) print(regularizer_base.eval(estimator_train.theta_fit)) - p = plt.plot(xtest, mean, lw = 4, label = "$||f|| \leq "+str(budget/lam)+"$") - #p2 = plt.plot(xtest_small, mean_small,'o-', ms = 25, lw = 4, label = "$||f|| \leq "+str(budget/lam)+"$") + p = plt.plot(xtest, mean, lw=4, label="$||f|| \leq " + str(budget / lam) + "$") + # p2 = plt.plot(xtest_small, mean_small,'o-', ms = 25, lw = 4, label = "$||f|| \leq "+str(budget/lam)+"$") # - ucb = estimator_train.ucb(xtest_small, type = "LR_static") - lcb = estimator_train.lcb(xtest_small, type = "LR_static") + ucb = estimator_train.ucb(xtest_small, type="LR_static") + lcb = estimator_train.lcb(xtest_small, type="LR_static") # - #plt.errorbar(xtest_small.view(-1), mean_small.view(-1),yerr = ucb.view(-1), ms = 25,alpha = 1., color = p[0].get_color(), lw=5) - plt.fill_between(xtest_small.view(-1),lcb.view(-1), ucb.view(-1),alpha = 0.1, color = p[0].get_color()) - plt.plot(xtest, xtest*0 + epsilon, 'k--') - plt.legend(fontsize = 35) + # plt.errorbar(xtest_small.view(-1), mean_small.view(-1),yerr = ucb.view(-1), ms = 25,alpha = 1., color = p[0].get_color(), lw=5) + plt.fill_between( + xtest_small.view(-1), + lcb.view(-1), + ucb.view(-1), + alpha=0.1, + color=p[0].get_color(), + ) + plt.plot(xtest, xtest * 0 + epsilon, "k--") + plt.legend(fontsize=35) plt.show() diff --git a/tests/continous_processes/test_estimators/test_regularized_dictionary_l2.py b/tests/continous_processes/test_estimators/test_regularized_dictionary_l2.py index 82467ad..640468b 100644 --- a/tests/continous_processes/test_estimators/test_regularized_dictionary_l2.py +++ b/tests/continous_processes/test_estimators/test_regularized_dictionary_l2.py @@ -8,73 +8,99 @@ from stpy.probability.gaussian_likelihood import GaussianLikelihood from stpy.regularization.regularizer import L2Regularizer, L1Regularizer from stpy.helpers.helper import interval_torch -from stpy.regularization.constraints import QuadraticInequalityConstraint, AbsoluteValueConstraint +from stpy.regularization.constraints import ( + QuadraticInequalityConstraint, + AbsoluteValueConstraint, +) from stpy.kernels import KernelFunction m = 64 d = 1 sigma = 0.1 -lam = 1. +lam = 1.0 n = 256 I = torch.eye(m).double() -budget = m*10e10 -kernel_object = KernelFunction(gamma = 0.1, d = 1) -#embedding = TriangleEmbedding(m = m, d = 1, B = 10, b = -10, kernel_object=kernel_object) +budget = m * 10e10 +kernel_object = KernelFunction(gamma=0.1, d=1) +# embedding = TriangleEmbedding(m = m, d = 1, B = 10, b = -10, kernel_object=kernel_object) -embedding_base = FaberSchauderEmbedding(m = m, d = 1, kernel_object=None, offset=0) -# this defines the decay of the functions -def decay_function(emb): - return (emb.hierarchical_mask()+1)**(-15) +embedding_base = FaberSchauderEmbedding(m=m, d=1, kernel_object=None, offset=0) -print (decay_function(embedding_base)) -embedding = WeightedEmbedding(embedding_base,weight_function=decay_function) - -# embedding = RFFEmbeddQing(m = m, d=1, gamma = 0.1) +# this defines the decay of the functions +def decay_function(emb): + return (emb.hierarchical_mask() + 1) ** (-15) -likelihood = GaussianLikelihood(sigma = sigma) -regularizer_L2 = L2Regularizer(lam = lam) -regularizer_L1 = L1Regularizer(lam = lam) +print(decay_function(embedding_base)) -constraint_L2 = QuadraticInequalityConstraint(Q = I, c = budget) -constraint_L1 = AbsoluteValueConstraint(c = np.sqrt(budget)) +embedding = WeightedEmbedding(embedding_base, weight_function=decay_function) -estimator_L2_L2 = RegularizedDictionary(embedding, likelihood, regularizer_L2, - constraints = constraint_L2, use_constraint=False) -estimator_L1_L2 = RegularizedDictionary(embedding, likelihood, regularizer_L1, - constraints = constraint_L2, use_constraint=False) -estimator_L2_L1 = RegularizedDictionary(embedding, likelihood, regularizer_L2, - constraints = constraint_L1, use_constraint=False) -estimator_L1_L1 = RegularizedDictionary(embedding, likelihood, regularizer_L1, - constraints = constraint_L1, use_constraint=False) +# embedding = RFFEmbeddQing(m = m, d=1, gamma = 0.1) -estimators = [estimator_L2_L2,estimator_L2_L1,estimator_L1_L2,estimator_L1_L1] +likelihood = GaussianLikelihood(sigma=sigma) +regularizer_L2 = L2Regularizer(lam=lam) +regularizer_L1 = L1Regularizer(lam=lam) + + +constraint_L2 = QuadraticInequalityConstraint(Q=I, c=budget) +constraint_L1 = AbsoluteValueConstraint(c=np.sqrt(budget)) + +estimator_L2_L2 = RegularizedDictionary( + embedding, + likelihood, + regularizer_L2, + constraints=constraint_L2, + use_constraint=False, +) +estimator_L1_L2 = RegularizedDictionary( + embedding, + likelihood, + regularizer_L1, + constraints=constraint_L2, + use_constraint=False, +) +estimator_L2_L1 = RegularizedDictionary( + embedding, + likelihood, + regularizer_L2, + constraints=constraint_L1, + use_constraint=False, +) +estimator_L1_L1 = RegularizedDictionary( + embedding, + likelihood, + regularizer_L1, + constraints=constraint_L1, + use_constraint=False, +) + +estimators = [estimator_L2_L2, estimator_L2_L1, estimator_L1_L2, estimator_L1_L1] names = ["reg:L2 con:L2", "reg:L2 con:L1", "reg:L1 con:L2", "reg:L1 con:L1"] -styles = ["-","--","-","--"] +styles = ["-", "--", "-", "--"] N = 1 -v = torch.randn(size = (m,1)).double() -F = lambda X: embedding.embed(X)@v +v = torch.randn(size=(m, 1)).double() +F = lambda X: embedding.embed(X) @ v X = torch.Tensor([[0.5]]).double() y = F(X) -xtest = interval_torch(n = n,d = 1) +xtest = interval_torch(n=n, d=1) -plt.plot(xtest, F(xtest), lw = 5) -plt.plot(X, y, 'ro', ms = 25) +plt.plot(xtest, F(xtest), lw=5) +plt.plot(X, y, "ro", ms=25) -for j,estimator in enumerate(estimators): - print ("Calculating:",names[j]) - estimator.load_data((X,y)) +for j, estimator in enumerate(estimators): + print("Calculating:", names[j]) + estimator.load_data((X, y)) estimator.fit() mean = estimator.mean(xtest) - #ucb = estimator.ucb(xtest, type = "LR_static") - #lcb = estimator.lcb(xtest, type = "LR_static") + # ucb = estimator.ucb(xtest, type = "LR_static") + # lcb = estimator.lcb(xtest, type = "LR_static") - #plt.title("Norm: "+str(torch.norm(estimator.theta_fit)**2)) - plt.plot(xtest, mean, label = names[j], lw = 4, linestyle = styles[j]) - #plt.fill_between(xtest.view(-1), lcb.view(-1), ucb.view(-1), alpha = 0.1) + # plt.title("Norm: "+str(torch.norm(estimator.theta_fit)**2)) + plt.plot(xtest, mean, label=names[j], lw=4, linestyle=styles[j]) + # plt.fill_between(xtest.view(-1), lcb.view(-1), ucb.view(-1), alpha = 0.1) -plt.legend(fontsize = 35) +plt.legend(fontsize=35) plt.show() diff --git a/tests/continous_processes/test_huber_loss.py b/tests/continous_processes/test_huber_loss.py index c76de41..edf105e 100644 --- a/tests/continous_processes/test_huber_loss.py +++ b/tests/continous_processes/test_huber_loss.py @@ -10,77 +10,97 @@ d = 1 eps = 0.01 s = 1 -x = torch.rand(N,d).double()*2 - 1 -xtest = torch.from_numpy(interval(n,d,L_infinity_ball=1)) +x = torch.rand(N, d).double() * 2 - 1 +xtest = torch.from_numpy(interval(n, d, L_infinity_ball=1)) # true GP_true = GaussianProcess(gamma=0.1, kernel_name="squared_exponential", d=d) ytest = GP_true.sample(xtest) -GP_true.fit_gp(xtest,ytest) +GP_true.fit_gp(xtest, ytest) -plt.plot(xtest,GP_true.mean(xtest),'b-') +plt.plot(xtest, GP_true.mean(xtest), "b-") y = GP_true.mean(x).clone() -GP = GaussianProcess(gamma=0.1, kernel_name="squared_exponential", d=d, loss = 'huber', huber_delta=0.01, s = s) +GP = GaussianProcess( + gamma=0.1, + kernel_name="squared_exponential", + d=d, + loss="huber", + huber_delta=0.01, + s=s, +) -xnew = x[0,:].view(1,1) + eps -ynew = y[0,0].view(1,1) + 1 +xnew = x[0, :].view(1, 1) + eps +ynew = y[0, 0].view(1, 1) + 1 -y2 = torch.vstack([y,ynew]) -x2 = torch.vstack([x,xnew]) +y2 = torch.vstack([y, ynew]) +x2 = torch.vstack([x, xnew]) -GP.fit_gp(x2,y2) +GP.fit_gp(x2, y2) GP2 = GaussianProcess(gamma=0.1, kernel_name="squared_exponential", d=d) GP3 = GaussianProcess(gamma=0.1, kernel_name="squared_exponential", d=d) -GP2.fit_gp(x2,y2) -#GP2.visualize(xtest, show = False, fig = False) -#plt.show() - +GP2.fit_gp(x2, y2) +# GP2.visualize(xtest, show = False, fig = False) +# plt.show() ### marginalized likelihood with normal loss_two_ways # plot true function -plt.plot(xtest,GP_true.mean(xtest),'b--',label = "truth", lw = 3) +plt.plot(xtest, GP_true.mean(xtest), "b--", label="truth", lw=3) # with noise optimize -GP2.fit_gp(x2,y2) -GP2.optimize_params(type="bandwidth", restarts=5, verbose = False, optimizer = 'pytorch-minimize', scale = 1.) +GP2.fit_gp(x2, y2) +GP2.optimize_params( + type="bandwidth", restarts=5, verbose=False, optimizer="pytorch-minimize", scale=1.0 +) mu = GP2.mean(xtest) -plt.plot(xtest,mu, 'r-', label = "squared-corupted", lw = 3) -#GP2.visualize(xtest, show = False, fig = False, size = 0) +plt.plot(xtest, mu, "r-", label="squared-corupted", lw=3) +# GP2.visualize(xtest, show = False, fig = False, size = 0) # no noise optimize -GP2.fit_gp(x,y) -GP2.optimize_params(type="bandwidth", restarts=5, verbose = False, optimizer = 'pytorch-minimize', scale = 1.) +GP2.fit_gp(x, y) +GP2.optimize_params( + type="bandwidth", restarts=5, verbose=False, optimizer="pytorch-minimize", scale=1.0 +) mu = GP2.mean(xtest) -plt.plot(xtest,mu, '--x', color ="tab:brown" , label = 'squared-uncorrupted', lw = 3) +plt.plot(xtest, mu, "--x", color="tab:brown", label="squared-uncorrupted", lw=3) # with huber optimize -GP = GaussianProcess(gamma=0.1, kernel_name="squared_exponential", d=d, loss = 'huber', huber_delta=1.3) -GP.fit_gp(x2,y2) -GP.optimize_params(type="bandwidth", restarts=5, verbose = False, optimizer = 'pytorch-minimize', scale = 1., weight=1.) +GP = GaussianProcess( + gamma=0.1, kernel_name="squared_exponential", d=d, loss="huber", huber_delta=1.3 +) +GP.fit_gp(x2, y2) +GP.optimize_params( + type="bandwidth", + restarts=5, + verbose=False, + optimizer="pytorch-minimize", + scale=1.0, + weight=1.0, +) mu = GP2.mean(xtest) -plt.plot(xtest,mu, color = "tab:green", label = 'huber-corupted', lw = 3) +plt.plot(xtest, mu, color="tab:green", label="huber-corupted", lw=3) # GP = GaussianProcess(gamma=0.1, kernel_name="squared_exponential", d=d, loss = 'huber', huber_delta=1.3) # GP.fit_gp(x2,y2) # mu = GP2.mean(xtest) # plt.plot(xtest,mu, 'r-', label = 'huber-true-model-corupted') -GP = GaussianProcess(gamma=0.1, kernel_name="squared_exponential", d=d, loss = 'huber', huber_delta=1.3) -GP.fit_gp(x,y) +GP = GaussianProcess( + gamma=0.1, kernel_name="squared_exponential", d=d, loss="huber", huber_delta=1.3 +) +GP.fit_gp(x, y) mu = GP.mean(xtest) -plt.plot(xtest,mu, '--', color = "tab:orange", label = 'huber-uncorrupted', lw = 3) +plt.plot(xtest, mu, "--", color="tab:orange", label="huber-uncorrupted", lw=3) plt.legend() -plt.plot(x,y, 'ro', ms = 5) +plt.plot(x, y, "ro", ms=5) -plt.plot(xnew,ynew, 'ko', ms = 10) +plt.plot(xnew, ynew, "ko", ms=10) plt.show() # GP.fit_gp(x,y2) # GP.optimize_params(type="bandwidth", restarts=10, verbose = False, optimizer = 'pytorch-minimize', scale = 10.) # GP.visualize(xtest, show = True, fig = False, color = 'yellow') # - diff --git a/tests/continous_processes/test_marginalized_pytorch_minimize.py b/tests/continous_processes/test_marginalized_pytorch_minimize.py index 8e97e47..beaeea0 100644 --- a/tests/continous_processes/test_marginalized_pytorch_minimize.py +++ b/tests/continous_processes/test_marginalized_pytorch_minimize.py @@ -3,27 +3,29 @@ from stpy.kernels import KernelFunction from stpy.helpers.helper import interval -#%% +# %% n = 100 d = 2 -x = torch.rand(n,d).double()*2 - 1 -xtest = torch.from_numpy(interval(50,2,L_infinity_ball=1)) +x = torch.rand(n, d).double() * 2 - 1 +xtest = torch.from_numpy(interval(50, 2, L_infinity_ball=1)) -#%% +# %% GP = GaussianProcess(gamma=0.1, kernel_name="squared_exponential", d=2) y = GP.sample(x) -GP.fit_gp(x,y) -GP.visualize_contour(xtest, ms = 10) +GP.fit_gp(x, y) +GP.visualize_contour(xtest, ms=10) -#%% +# %% ## Kernels can be defined as via kernel object # 2 dimensional additive kernel with groups [0] and [1] -k = KernelFunction(kernel_name = "ard", d = 2, groups = [[0,1]] ) +k = KernelFunction(kernel_name="ard", d=2, groups=[[0, 1]]) GP = GaussianProcess(kernel=k) -GP.fit_gp(x,y) -GP.optimize_params(type="bandwidth", restarts = 2, verbose = False, optimizer = 'pytorch-minimize') -GP.visualize_contour(xtest, ms = 10) +GP.fit_gp(x, y) +GP.optimize_params( + type="bandwidth", restarts=2, verbose=False, optimizer="pytorch-minimize" +) +GP.visualize_contour(xtest, ms=10) diff --git a/tests/continous_processes/test_svr_loss.py b/tests/continous_processes/test_svr_loss.py index 4844b98..9d9d124 100644 --- a/tests/continous_processes/test_svr_loss.py +++ b/tests/continous_processes/test_svr_loss.py @@ -11,87 +11,102 @@ eps = 0.01 s = 0.1 B = 0.001 -x = torch.rand(N,d).double()*2 - 1 -xtest = torch.from_numpy(interval(n,d,L_infinity_ball=1)) +x = torch.rand(N, d).double() * 2 - 1 +xtest = torch.from_numpy(interval(n, d, L_infinity_ball=1)) # true GP_true = GaussianProcess(gamma=0.1, kernel_name="squared_exponential", d=d) ytest = GP_true.sample(xtest) -GP_true.fit_gp(xtest,ytest) - +GP_true.fit_gp(xtest, ytest) y = GP_true.mean(x).clone() -xnew = x[0,:].view(1,1) + eps -ynew = torch.rand(size = (1,1))*B -y2 = torch.vstack([y,ynew]) -x2 = torch.vstack([x,xnew]) - -GP = GaussianProcess(gamma=0.1, kernel_name="squared_exponential", d=d, loss = 'svr', svr_eps=0.1, s = s) -GP2 = GaussianProcess(gamma=0.1, kernel_name="squared_exponential", d=d, s = s, loss = "squared") -GP3 = GaussianProcess(gamma=0.1, kernel_name="squared_exponential", d=d, s = s, loss = 'unif', B = B) -GP4 = GaussianProcess(gamma=0.1, kernel_name="squared_exponential", d=d, s = s, loss = 'huber', huber_delta=1.35) - -GP.fit_gp(x,y) -GP2.fit_gp(x,y) -GP3.fit_gp(x,y) -GP4.fit_gp(x,y) - -plt.plot(x,y, 'ro', label = 'points') -plt.plot(xtest, ytest, 'b-', label = "truth") - -plt.plot(xtest, GP.mean_std(xtest)[0], 'g-', label = "svr") -plt.plot(xtest, GP2.mean_std(xtest)[0], 'r-', label = "squared") -plt.plot(xtest, GP3.mean_std(xtest)[0], 'y-', label = "unif") - -plt.plot(xtest, GP4.mean_std(xtest)[0], '--', color = "orange", label = "huber") +xnew = x[0, :].view(1, 1) + eps +ynew = torch.rand(size=(1, 1)) * B +y2 = torch.vstack([y, ynew]) +x2 = torch.vstack([x, xnew]) + +GP = GaussianProcess( + gamma=0.1, kernel_name="squared_exponential", d=d, loss="svr", svr_eps=0.1, s=s +) +GP2 = GaussianProcess( + gamma=0.1, kernel_name="squared_exponential", d=d, s=s, loss="squared" +) +GP3 = GaussianProcess( + gamma=0.1, kernel_name="squared_exponential", d=d, s=s, loss="unif", B=B +) +GP4 = GaussianProcess( + gamma=0.1, + kernel_name="squared_exponential", + d=d, + s=s, + loss="huber", + huber_delta=1.35, +) + +GP.fit_gp(x, y) +GP2.fit_gp(x, y) +GP3.fit_gp(x, y) +GP4.fit_gp(x, y) + +plt.plot(x, y, "ro", label="points") +plt.plot(xtest, ytest, "b-", label="truth") + +plt.plot(xtest, GP.mean_std(xtest)[0], "g-", label="svr") +plt.plot(xtest, GP2.mean_std(xtest)[0], "r-", label="squared") +plt.plot(xtest, GP3.mean_std(xtest)[0], "y-", label="unif") + +plt.plot(xtest, GP4.mean_std(xtest)[0], "--", color="orange", label="huber") plt.legend() plt.show() -GP.fit_gp(x2,y2) -GP2.fit_gp(x2,y2) -GP3.fit_gp(x2,y2) -GP4.fit_gp(x2,y2) +GP.fit_gp(x2, y2) +GP2.fit_gp(x2, y2) +GP3.fit_gp(x2, y2) +GP4.fit_gp(x2, y2) -plt.plot(x,y, 'ro', label = 'points') -plt.plot(xnew,ynew, 'ko', label = 'corrupted') -plt.plot(xtest, ytest, 'b-', label = "truth") +plt.plot(x, y, "ro", label="points") +plt.plot(xnew, ynew, "ko", label="corrupted") +plt.plot(xtest, ytest, "b-", label="truth") -plt.plot(xtest, GP.mean_std(xtest)[0], 'g-', label = "svr") -plt.plot(xtest, GP2.mean_std(xtest)[0], 'r-', label = "squared") -plt.plot(xtest, GP3.mean_std(xtest)[0], 'y-', label = "unif") -plt.plot(xtest, GP4.mean_std(xtest)[0], '--', color = "orange", label = "huber") +plt.plot(xtest, GP.mean_std(xtest)[0], "g-", label="svr") +plt.plot(xtest, GP2.mean_std(xtest)[0], "r-", label="squared") +plt.plot(xtest, GP3.mean_std(xtest)[0], "y-", label="unif") +plt.plot(xtest, GP4.mean_std(xtest)[0], "--", color="orange", label="huber") plt.legend() plt.show() - - -GP.fit_gp(x2,y2) -GP2.fit_gp(x2,y2) -GP3.fit_gp(x2,y2) -GP4.fit_gp(x2,y2) - -plt.plot(x,y, 'ro', label = 'points') -plt.plot(xnew,ynew, 'ko', label = 'corrupted') -plt.plot(xtest, ytest, 'b-', label = "truth") - -GP.fit_gp(x2,y2) -GP.optimize_params(type="bandwidth", restarts=5, verbose = False, optimizer = 'pytorch-minimize', scale = 1.) -GP2.fit_gp(x2,y2) -GP2.optimize_params(type="bandwidth", restarts=5, verbose = False, optimizer = 'pytorch-minimize', scale = 1.) -GP3.fit_gp(x2,y2) -#GP3.optimize_params(type="bandwidth", restarts=5, verbose = False, optimizer = 'pytorch-minimize', scale = 1.) -GP4.fit_gp(x2,y2) -GP4.optimize_params(type="bandwidth", restarts=5, verbose = False, optimizer = 'pytorch-minimize', scale = 1.) - -plt.plot(xtest, GP.mean_std(xtest)[0], 'g-', label = "svr") -plt.plot(xtest, GP2.mean_std(xtest)[0], 'r-', label = "squared") -plt.plot(xtest, GP3.mean_std(xtest)[0], 'y-', label = "unif") -plt.plot(xtest, GP4.mean_std(xtest)[0], '--', color = "orange", label = "huber") - +GP.fit_gp(x2, y2) +GP2.fit_gp(x2, y2) +GP3.fit_gp(x2, y2) +GP4.fit_gp(x2, y2) + +plt.plot(x, y, "ro", label="points") +plt.plot(xnew, ynew, "ko", label="corrupted") +plt.plot(xtest, ytest, "b-", label="truth") + +GP.fit_gp(x2, y2) +GP.optimize_params( + type="bandwidth", restarts=5, verbose=False, optimizer="pytorch-minimize", scale=1.0 +) +GP2.fit_gp(x2, y2) +GP2.optimize_params( + type="bandwidth", restarts=5, verbose=False, optimizer="pytorch-minimize", scale=1.0 +) +GP3.fit_gp(x2, y2) +# GP3.optimize_params(type="bandwidth", restarts=5, verbose = False, optimizer = 'pytorch-minimize', scale = 1.) +GP4.fit_gp(x2, y2) +GP4.optimize_params( + type="bandwidth", restarts=5, verbose=False, optimizer="pytorch-minimize", scale=1.0 +) + +plt.plot(xtest, GP.mean_std(xtest)[0], "g-", label="svr") +plt.plot(xtest, GP2.mean_std(xtest)[0], "r-", label="squared") +plt.plot(xtest, GP3.mean_std(xtest)[0], "y-", label="unif") +plt.plot(xtest, GP4.mean_std(xtest)[0], "--", color="orange", label="huber") plt.legend() diff --git a/tests/continous_processes/test_unif_marginalized.py b/tests/continous_processes/test_unif_marginalized.py index a27c508..0369049 100644 --- a/tests/continous_processes/test_unif_marginalized.py +++ b/tests/continous_processes/test_unif_marginalized.py @@ -12,37 +12,45 @@ eps = 0.01 s = 0.1 B = 0.001 -x = torch.rand(N,d).double()*2 - 1 -xtest = torch.from_numpy(interval(n,d,L_infinity_ball=1)) +x = torch.rand(N, d).double() * 2 - 1 +xtest = torch.from_numpy(interval(n, d, L_infinity_ball=1)) # true GP_true = GaussianProcess(gamma=0.1, kernel_name="squared_exponential", d=d) ytest = GP_true.sample(xtest) -GP_true.fit_gp(xtest,ytest) - +GP_true.fit_gp(xtest, ytest) y = GP_true.mean(x).clone() -xnew = x[0,:].view(1,1) + eps -ynew = torch.rand(size = (1,1))*B -y2 = torch.vstack([y,ynew]) -x2 = torch.vstack([x,xnew]) - -GP = GaussianProcess(gamma=0.1, kernel_name="squared_exponential", d=d, s = s, loss = 'huber') -GP2 = GaussianProcess(gamma=0.1, kernel_name="squared_exponential", d=d, s = s, loss = "squared") -GP3 = GaussianProcess(gamma=0.1, kernel_name="squared_exponential", d=d, s = s, loss = "unif_new") - - - -GP.fit_gp(x2,y2) -GP.optimize_params(type="bandwidth", restarts=5, verbose = False, optimizer = 'pytorch-minimize', scale = 1.) -GP2.fit_gp(x2,y2) -#GP2.optimize_params(type="bandwidth", restarts=5, verbose = False, optimizer = 'pytorch-minimize', scale = 1.) -GP3.fit_gp(x2,y2) -GP3.optimize_params(type="bandwidth", restarts=5, verbose = False, optimizer = 'pytorch-minimize', scale = 1.) - -plt.plot(xtest, GP.mean_std(xtest)[0], 'g-', label = "huber") -plt.plot(xtest, GP2.mean_std(xtest)[0], 'r-', label = "squared") -plt.plot(xtest, GP3.mean_std(xtest)[0], 'y-', label = "unif") +xnew = x[0, :].view(1, 1) + eps +ynew = torch.rand(size=(1, 1)) * B +y2 = torch.vstack([y, ynew]) +x2 = torch.vstack([x, xnew]) + +GP = GaussianProcess( + gamma=0.1, kernel_name="squared_exponential", d=d, s=s, loss="huber" +) +GP2 = GaussianProcess( + gamma=0.1, kernel_name="squared_exponential", d=d, s=s, loss="squared" +) +GP3 = GaussianProcess( + gamma=0.1, kernel_name="squared_exponential", d=d, s=s, loss="unif_new" +) + + +GP.fit_gp(x2, y2) +GP.optimize_params( + type="bandwidth", restarts=5, verbose=False, optimizer="pytorch-minimize", scale=1.0 +) +GP2.fit_gp(x2, y2) +# GP2.optimize_params(type="bandwidth", restarts=5, verbose = False, optimizer = 'pytorch-minimize', scale = 1.) +GP3.fit_gp(x2, y2) +GP3.optimize_params( + type="bandwidth", restarts=5, verbose=False, optimizer="pytorch-minimize", scale=1.0 +) + +plt.plot(xtest, GP.mean_std(xtest)[0], "g-", label="huber") +plt.plot(xtest, GP2.mean_std(xtest)[0], "r-", label="squared") +plt.plot(xtest, GP3.mean_std(xtest)[0], "y-", label="unif") plt.legend() -plt.show() \ No newline at end of file +plt.show() diff --git a/tests/convergence_test.py b/tests/convergence_test.py index ae65243..b630a63 100755 --- a/tests/convergence_test.py +++ b/tests/convergence_test.py @@ -15,30 +15,53 @@ # number of intial points N = 3 # smoothness -gamma = torch.ones(d, dtype= torch.float64)*1 +gamma = torch.ones(d, dtype=torch.float64) * 1 # test problem xtest = torch.from_numpy(interval(n, d)) x = torch.from_numpy(np.random.uniform(-L_infinity_ball, L_infinity_ball, size=(N, d))) f_no_noise = lambda q: torch.sin(torch.sum(q * 4, dim=1)).view(-1, 1) -f = lambda q: f_no_noise(q) + torch.normal(mean=torch.zeros(q.size()[0], 1, dtype=torch.float64), std=1., - out=None) * s +f = ( + lambda q: f_no_noise(q) + + torch.normal( + mean=torch.zeros(q.size()[0], 1, dtype=torch.float64), std=1.0, out=None + ) + * s +) # targets y = f(x) # GP model with squared exponential m = 12 groups = None -GP = GaussianProcess(kernel = "squared_exponential", s=s, gamma = gamma[0], d=d, groups = groups) -GP_KL = GaussianProcessFF(kernel="squared_exponential", s=s, m=m, d=d, gamma=gamma[0], groups=groups, approx="kl") -GP_He = GaussianProcessFF(kernel="squared_exponential", s=s, m=m, d=d, gamma=gamma[0], groups=groups, approx="hermite") +GP = GaussianProcess( + kernel="squared_exponential", s=s, gamma=gamma[0], d=d, groups=groups +) +GP_KL = GaussianProcessFF( + kernel="squared_exponential", + s=s, + m=m, + d=d, + gamma=gamma[0], + groups=groups, + approx="kl", +) +GP_He = GaussianProcessFF( + kernel="squared_exponential", + s=s, + m=m, + d=d, + gamma=gamma[0], + groups=groups, + approx="hermite", +) # fit GP GP.fit_gp(x, y) GP_KL.fit_gp(x, y) GP_He.fit_gp(x, y) -print (GP.K) -print (GP_KL.right_kernel()) -print (GP_He.right_kernel()) \ No newline at end of file +print(GP.K) +print(GP_KL.right_kernel()) +print(GP_He.right_kernel()) diff --git a/tests/cvxopt_integer_test.py b/tests/cvxopt_integer_test.py index 977bd52..8591649 100755 --- a/tests/cvxopt_integer_test.py +++ b/tests/cvxopt_integer_test.py @@ -1,94 +1,99 @@ import numpy as np import cvxopt import torch -from cvxopt import glpk,solvers +from cvxopt import glpk, solvers from stpy.continuous_processes.gauss_procc import GaussianProcess import matplotlib.pyplot as plt + N = 128 d = 30 # Rotation -theta = np.radians(45.) +theta = np.radians(45.0) thetainv = np.pi - theta c, s = np.cos(theta), np.sin(theta) Q = torch.from_numpy(np.array(((c, -s), (s, c)))) -M = torch.randn(size = (d,d), dtype = torch.float64) -[Q,R] = torch.qr(M) - - -def solve(Q,c,n = 10, verbose = True, up = None, low = None): - print ("Starting Acq. Fucn solver...") - print ("Resolution: ", n) - - # Grid - - tau = torch.from_numpy(np.arange(-n,n+1,1).astype(np.double))/n - s = torch.ones(2*n+1) - Tau = torch.zeros(size = (d,d*(2*n+1)), dtype = torch.float64) - S = torch.zeros(size = (d,d*(2*n+1)), dtype = torch.float64) - for j in range(d): - Tau[j,j*(2*n+1):(j+1)*(2*n+1)] = tau - S[j, j * (2 * n + 1):(j + 1) * (2 * n + 1)] = s - - B = Q @ Tau - - if (up is not None) or (low is not None): - G = torch.cat((B, -B, S, -S, torch.t(c),-torch.t(c))) - h = torch.ones(4 * d + 2) - h[0:2 * d] = 1 - h[3 * d:4 * d] = -1 - h[4 * d ] = up - h[4 * d + 1] = -low - else: - G = torch.cat((B, -B, S, -S)) - h = torch.ones(4 * d) - h[0:2 * d] = 1 - h[3 * d:4 * d] = -1 - # Indicator variables - x = torch.zeros(size = (d*(2*n+1),1),dtype = torch.float64) - print (h) - cc = cvxopt.matrix(c.view(-1).numpy().astype(np.double)) - Gc = cvxopt.matrix(G.numpy().astype(np.double)) - hc = cvxopt.matrix(h.numpy().astype(np.double)) - - glpk.options['it_lim'] = 10 - - solvers.solve(solver=cp.CBC) - (status, x)= glpk.ilp(cc,Gc,hc,B=set(range(d*(2*n+1))) ) - - return x +M = torch.randn(size=(d, d), dtype=torch.float64) +[Q, R] = torch.qr(M) + + +def solve(Q, c, n=10, verbose=True, up=None, low=None): + print("Starting Acq. Fucn solver...") + print("Resolution: ", n) + + # Grid + + tau = torch.from_numpy(np.arange(-n, n + 1, 1).astype(np.double)) / n + s = torch.ones(2 * n + 1) + Tau = torch.zeros(size=(d, d * (2 * n + 1)), dtype=torch.float64) + S = torch.zeros(size=(d, d * (2 * n + 1)), dtype=torch.float64) + for j in range(d): + Tau[j, j * (2 * n + 1) : (j + 1) * (2 * n + 1)] = tau + S[j, j * (2 * n + 1) : (j + 1) * (2 * n + 1)] = s + + B = Q @ Tau + + if (up is not None) or (low is not None): + G = torch.cat((B, -B, S, -S, torch.t(c), -torch.t(c))) + h = torch.ones(4 * d + 2) + h[0 : 2 * d] = 1 + h[3 * d : 4 * d] = -1 + h[4 * d] = up + h[4 * d + 1] = -low + else: + G = torch.cat((B, -B, S, -S)) + h = torch.ones(4 * d) + h[0 : 2 * d] = 1 + h[3 * d : 4 * d] = -1 + # Indicator variables + x = torch.zeros(size=(d * (2 * n + 1), 1), dtype=torch.float64) + print(h) + cc = cvxopt.matrix(c.view(-1).numpy().astype(np.double)) + Gc = cvxopt.matrix(G.numpy().astype(np.double)) + hc = cvxopt.matrix(h.numpy().astype(np.double)) + + glpk.options["it_lim"] = 10 + + solvers.solve(solver=cp.CBC) + (status, x) = glpk.ilp(cc, Gc, hc, B=set(range(d * (2 * n + 1)))) + + return x + # def N is the desired resolution -tau = torch.from_numpy(np.arange(-N,N+1,1).astype(np.double))/N -gp = GaussianProcess(gamma = 0.5, s = 0.001) -c = torch.randn(size = (d*(2*N+1),1), dtype = torch.float64) +tau = torch.from_numpy(np.arange(-N, N + 1, 1).astype(np.double)) / N +gp = GaussianProcess(gamma=0.5, s=0.001) +c = torch.randn(size=(d * (2 * N + 1), 1), dtype=torch.float64) for i in range(d): - plt.plot(gp.sample(tau.view(-1,1)).numpy()) - c[i*(2*N+1):(i+1)*(2*N+1)] = gp.sample(tau.view(-1,1)) + plt.plot(gp.sample(tau.view(-1, 1)).numpy()) + c[i * (2 * N + 1) : (i + 1) * (2 * N + 1)] = gp.sample(tau.view(-1, 1)) plt.show() -def select(c,N,n, low, up): - plt.subplot(211) - cs = torch.randn(size = (d*(2*n+1),1), dtype = torch.float64) - step = N//n - plt.plot(c.numpy()) - for i in range(d): - for j in range(2*n+1): - cs[i*(2*n+1)+j] = c[i*(2*N+1)+(j*step)] - plt.plot(i*(2*N+1)+(j*step),cs[i*(2*n+1)+j].numpy(),"ro") - - sum_c = c[0*(2*N+1):(0+1)*(2*N+1)] *0 - for i in range(d): - sum_c = sum_c+ c[i*(2*N+1):(i+1)*(2*N+1)] - if low is not None: - plt.subplot(2, 1, 2) - plt.plot(sum_c.numpy()) - plt.plot(sum_c.numpy()*0+low,"--", label = "low") - plt.plot(sum_c.numpy() * 0 + up, "--", label = "up") - plt.legend() - - plt.show() - return cs + +def select(c, N, n, low, up): + plt.subplot(211) + cs = torch.randn(size=(d * (2 * n + 1), 1), dtype=torch.float64) + step = N // n + plt.plot(c.numpy()) + for i in range(d): + for j in range(2 * n + 1): + cs[i * (2 * n + 1) + j] = c[i * (2 * N + 1) + (j * step)] + plt.plot( + i * (2 * N + 1) + (j * step), cs[i * (2 * n + 1) + j].numpy(), "ro" + ) + + sum_c = c[0 * (2 * N + 1) : (0 + 1) * (2 * N + 1)] * 0 + for i in range(d): + sum_c = sum_c + c[i * (2 * N + 1) : (i + 1) * (2 * N + 1)] + if low is not None: + plt.subplot(2, 1, 2) + plt.plot(sum_c.numpy()) + plt.plot(sum_c.numpy() * 0 + low, "--", label="low") + plt.plot(sum_c.numpy() * 0 + up, "--", label="up") + plt.legend() + + plt.show() + return cs up = None @@ -98,13 +103,13 @@ def select(c,N,n, low, up): x = solve(Q, c, n=N, up=up, low=low) for j in range(int(np.log2(N))): - n = np.power(2,j) + n = np.power(2, j) - print(N, n) - cs = select(c,N,n, low,up ) - x = solve(Q,cs,n = n, up=up, low = low) - up = float(torch.mm(torch.t(cs),torch.from_numpy(np.array(x)))) - low = float(torch.mm(torch.t(cs),torch.from_numpy(np.array(x)))) - L/n + print(N, n) + cs = select(c, N, n, low, up) + x = solve(Q, cs, n=n, up=up, low=low) + up = float(torch.mm(torch.t(cs), torch.from_numpy(np.array(x)))) + low = float(torch.mm(torch.t(cs), torch.from_numpy(np.array(x)))) - L / n """ m_value = 0 @@ -133,4 +138,4 @@ def select(c,N,n, low, up): #print (res['x']) #print (x) -""" \ No newline at end of file +""" diff --git a/tests/cvxpy_integer_test.py b/tests/cvxpy_integer_test.py index 083d921..6048abf 100755 --- a/tests/cvxpy_integer_test.py +++ b/tests/cvxpy_integer_test.py @@ -8,7 +8,7 @@ d = 20 # Rotation -theta = np.radians(45.) +theta = np.radians(45.0) thetainv = np.pi - theta c, s = np.cos(theta), np.sin(theta) Q = torch.from_numpy(np.array(((c, -s), (s, c)))) @@ -16,48 +16,47 @@ [Q, R] = torch.qr(M) -def solve(Q, c, n=10, verbose=True, up=None, low=None, xwarm = None): - if verbose == True: - print("Starting Acq. Fucn solver...") - print("Resolution: ", n) - # Grid - - tau = torch.from_numpy(np.arange(-n, n + 1, 1).astype(np.double)) / n - s = torch.ones(2 * n + 1) - Tau = torch.zeros(size=(d, d * (2 * n + 1)), dtype=torch.float64) - S = torch.zeros(size=(d, d * (2 * n + 1)), dtype=torch.float64) - for j in range(d): - Tau[j, j * (2 * n + 1):(j + 1) * (2 * n + 1)] = tau - S[j, j * (2 * n + 1):(j + 1) * (2 * n + 1)] = s - - B = Q @ Tau - - if (up is not None) or (low is not None): - G = torch.cat((B, -B, S, -S, torch.t(c), -torch.t(c))) - h = torch.ones(4 * d + 2) - h[0:2 * d] = 1 - h[3 * d:4 * d] = -1 - h[4 * d] = up - h[4 * d + 1] = -low - else: - G = torch.cat((B, -B, S, -S)) - h = torch.ones(4 * d) - h[0:2 * d] = 1 - h[3 * d:4 * d] = -1 - # Indicator variables - - x = cp.Variable(d * (2 * n + 1), boolean=True) - if xwarm is not None: - x.value = xwarm.numpy() - c = c.view(-1).numpy() - - objective = cp.Maximize(c * x) - constraints = [0 <= x, x <= 1, G.numpy()*x <= h.view(-1).numpy()] - prob = cp.Problem(objective, constraints) - prob.solve(solver=cp.MOSEK,verbose=verbose, warm_start=True) - - - return (x.value,Tau.numpy() @ x.value, np.dot(c,x.value)) +def solve(Q, c, n=10, verbose=True, up=None, low=None, xwarm=None): + if verbose == True: + print("Starting Acq. Fucn solver...") + print("Resolution: ", n) + # Grid + + tau = torch.from_numpy(np.arange(-n, n + 1, 1).astype(np.double)) / n + s = torch.ones(2 * n + 1) + Tau = torch.zeros(size=(d, d * (2 * n + 1)), dtype=torch.float64) + S = torch.zeros(size=(d, d * (2 * n + 1)), dtype=torch.float64) + for j in range(d): + Tau[j, j * (2 * n + 1) : (j + 1) * (2 * n + 1)] = tau + S[j, j * (2 * n + 1) : (j + 1) * (2 * n + 1)] = s + + B = Q @ Tau + + if (up is not None) or (low is not None): + G = torch.cat((B, -B, S, -S, torch.t(c), -torch.t(c))) + h = torch.ones(4 * d + 2) + h[0 : 2 * d] = 1 + h[3 * d : 4 * d] = -1 + h[4 * d] = up + h[4 * d + 1] = -low + else: + G = torch.cat((B, -B, S, -S)) + h = torch.ones(4 * d) + h[0 : 2 * d] = 1 + h[3 * d : 4 * d] = -1 + # Indicator variables + + x = cp.Variable(d * (2 * n + 1), boolean=True) + if xwarm is not None: + x.value = xwarm.numpy() + c = c.view(-1).numpy() + + objective = cp.Maximize(c * x) + constraints = [0 <= x, x <= 1, G.numpy() * x <= h.view(-1).numpy()] + prob = cp.Problem(objective, constraints) + prob.solve(solver=cp.MOSEK, verbose=verbose, warm_start=True) + + return (x.value, Tau.numpy() @ x.value, np.dot(c, x.value)) # def N is the desired resolution @@ -65,58 +64,58 @@ def solve(Q, c, n=10, verbose=True, up=None, low=None, xwarm = None): gp = GaussianProcess(gamma=0.5, s=0.001) c = torch.randn(size=(d * (2 * N + 1), 1), dtype=torch.float64) for i in range(d): - z = gp.sample(tau.view(-1, 1)) - plt.plot(z.numpy()) - c[i * (2 * N + 1):(i + 1) * (2 * N + 1)] = z + z = gp.sample(tau.view(-1, 1)) + plt.plot(z.numpy()) + c[i * (2 * N + 1) : (i + 1) * (2 * N + 1)] = z plt.show() def select(c, N, n, val): - cs = torch.randn(size=(d * (2 * n + 1), 1), dtype=torch.float64) - if val is not None: - sol = torch.randn(size=(d * (2 * n + 1), 1), dtype=torch.float64).view(-1)*0 - else: - sol = None - step = N // n - - for i in range(d): - #plt.plot(c[i * (2 * n + 1):(i+1) * (2 * n + 1)].numpy()) - for j in range(2 * n + 1): - cs[i * (2 * n + 1) + j] = c[i * (2 * N + 1) + (j * step)] - if val is not None: - if (c[i * (2 * N + 1) + (j * step)] - val[i])**2 < 10e-10: - sol[i * (2 * N + 1) + (j * step)] = 1.0 - #plt.plot((i * (2 * N + 1) + (j * step))/((i+1)*N), cs[i * (2 * n + 1) + j].numpy(), "ro") - #plt.show() - return cs,sol + cs = torch.randn(size=(d * (2 * n + 1), 1), dtype=torch.float64) + if val is not None: + sol = torch.randn(size=(d * (2 * n + 1), 1), dtype=torch.float64).view(-1) * 0 + else: + sol = None + step = N // n + + for i in range(d): + # plt.plot(c[i * (2 * n + 1):(i+1) * (2 * n + 1)].numpy()) + for j in range(2 * n + 1): + cs[i * (2 * n + 1) + j] = c[i * (2 * N + 1) + (j * step)] + if val is not None: + if (c[i * (2 * N + 1) + (j * step)] - val[i]) ** 2 < 10e-10: + sol[i * (2 * N + 1) + (j * step)] = 1.0 + # plt.plot((i * (2 * N + 1) + (j * step))/((i+1)*N), cs[i * (2 * n + 1) + j].numpy(), "ro") + # plt.show() + return cs, sol up = None low = None L = 10e20 -#x = solve(Q, c, n=N, up=up, low=low) +# x = solve(Q, c, n=N, up=up, low=low) sol = None val = None -for j in range(int(np.log2(N))+1): - n = np.power(2, j) +for j in range(int(np.log2(N)) + 1): + n = np.power(2, j) - print(N, n) - cs, sol = select(c, N, n, val) - x , val = solve(Q, cs, n=n, up=up, low=low, xwarm = sol) - print (x, val) - #up = float( torch.dot(cs.view(-1),torch.from_numpy(x))) - #low = float( torch.dot(cs.view(-1),torch.from_numpy(x))) - L/n - sol = x + print(N, n) + cs, sol = select(c, N, n, val) + x, val = solve(Q, cs, n=n, up=up, low=low, xwarm=sol) + print(x, val) + # up = float( torch.dot(cs.view(-1),torch.from_numpy(x))) + # low = float( torch.dot(cs.view(-1),torch.from_numpy(x))) - L/n + sol = x plt.figure() -colors = ['b','k','r','g','y'] +colors = ["b", "k", "r", "g", "y"] for i in range(d): - z = c[i * (2 * N + 1):(i + 1) * (2 * N + 1)].view(-1).numpy() - x = np.linspace(-1,1,2*N+1) - plt.plot(x,z, color = colors[i % 5], label = str(i)) - index = np.argmin(z) - plt.plot(val[i],z[index],'o', color = colors[i % 5],label = str(i), ms = 10) -#plt.legend() -plt.show() \ No newline at end of file + z = c[i * (2 * N + 1) : (i + 1) * (2 * N + 1)].view(-1).numpy() + x = np.linspace(-1, 1, 2 * N + 1) + plt.plot(x, z, color=colors[i % 5], label=str(i)) + index = np.argmin(z) + plt.plot(val[i], z[index], "o", color=colors[i % 5], label=str(i), ms=10) +# plt.legend() +plt.show() diff --git a/tests/dpps_tests.py b/tests/dpps_tests.py index 64689ae..ceb5444 100644 --- a/tests/dpps_tests.py +++ b/tests/dpps_tests.py @@ -1,7 +1,7 @@ -from stpy.helpers.helper import select_subset,select_subset_inv +from stpy.helpers.helper import select_subset, select_subset_inv import numpy as np if __name__ == "__main__": - n = 50 - M = np.random.randn(n,n) + n = 50 + M = np.random.randn(n, n) diff --git a/tests/embedding/faber_schauder_embedding.py b/tests/embedding/faber_schauder_embedding.py index 7fb9fb3..7ebd1d5 100644 --- a/tests/embedding/faber_schauder_embedding.py +++ b/tests/embedding/faber_schauder_embedding.py @@ -10,15 +10,15 @@ from stpy.helpers.helper import interval m = 16 -B4 = FaberSchauderEmbedding(m = m, d = 1) +B4 = FaberSchauderEmbedding(m=m, d=1) -plt.figure(figsize = (20,20)) -basis = lambda x,j: B4.basis_fun(x,j) -x = torch.from_numpy(np.linspace(-1,1,1024)).view(-1,1) -print (B4.hierarchical_mask()) +plt.figure(figsize=(20, 20)) +basis = lambda x, j: B4.basis_fun(x, j) +x = torch.from_numpy(np.linspace(-1, 1, 1024)).view(-1, 1) +print(B4.hierarchical_mask()) for j in range(m): - plt.plot(x,basis(x,j), lw = 6) - plt.grid(ls = '--', lw = 4) - plt.xlim((-1,1)) + plt.plot(x, basis(x, j), lw=6) + plt.grid(ls="--", lw=4) + plt.xlim((-1, 1)) plt.show() diff --git a/tests/fourier-features-multidimensional.py b/tests/fourier-features-multidimensional.py index b1cfdd3..a6bc381 100644 --- a/tests/fourier-features-multidimensional.py +++ b/tests/fourier-features-multidimensional.py @@ -2,25 +2,27 @@ from stpy.continuous_processes.fourier_fea import GaussianProcessFF from stpy.embeddings.embedding import QuadratureEmbedding from stpy.helpers.helper import interval -if __name__ == "__main__": - m = 128 +if __name__ == "__main__": - def cost_function(): - gamma = 0.2 - torch.manual_seed(245) - z2 = QuadratureEmbedding(gamma=gamma, m=m, d=2) - theta2d = torch.randn(m, 1).double() - F = lambda x: z2.embed_one(x[1, 0:2].view(1,-1)) @ theta2d - print (torch.norm(theta2d)) - return F + m = 128 - F = cost_function() - xtest = torch.from_numpy(interval(50,2)) - ytest = F(xtest) + def cost_function(): + gamma = 0.2 + torch.manual_seed(245) + z2 = QuadratureEmbedding(gamma=gamma, m=m, d=2) + theta2d = torch.randn(m, 1).double() + F = lambda x: z2.embed_one(x[1, 0:2].view(1, -1)) @ theta2d + print(torch.norm(theta2d)) + return F - GP = GaussianProcessFF(d = 2, groups=[[0,1]], m = torch.Tensor([m,64]), gamma = torch.Tensor([0.2])) - GP.fit_gp(xtest,ytest) + F = cost_function() + xtest = torch.from_numpy(interval(50, 2)) + ytest = F(xtest) - GP.visualize_contour(xtest,f_true=F) + GP = GaussianProcessFF( + d=2, groups=[[0, 1]], m=torch.Tensor([m, 64]), gamma=torch.Tensor([0.2]) + ) + GP.fit_gp(xtest, ytest) + GP.visualize_contour(xtest, f_true=F) diff --git a/tests/gibbs_kernel.py b/tests/gibbs_kernel.py index 0e8cf49..c647570 100644 --- a/tests/gibbs_kernel.py +++ b/tests/gibbs_kernel.py @@ -1,38 +1,40 @@ from stpy.kernels import KernelFunction -from stpy.continuous_processes.gauss_procc import GaussianProcess +from stpy.continuous_processes.gauss_procc import GaussianProcess from stpy.helpers.helper import interval from stpy.embeddings.optimal_positive_basis import OptimalPositiveBasis import matplotlib.pyplot as plt import torch import numpy as np + n = 1024 d = 1 def gamma(x): - out = x[:,0].view(-1,1)*0 - small = x <= - 0.5 - mid = torch.logical_and(x >= -0.5,x <= 0.5) - large = x > 0.5 - gamma1 = 0.1 - gamma2 = 1. - out[small] = (gamma2-gamma1)/(torch.exp(-25*(x[small]+0.5)) + 1) + gamma1 - out[mid] = gamma2 - out[large] = (gamma2-gamma1)/(torch.exp(-25*(-x[large]+0.5)) + 1) + gamma1 - return out - -gamma = lambda x: x[:,0].view(-1,1)*0 + 0.05 + 0.3*(x+1)**4 - -#gamma = lambda x: x[x<-0.5]*0 +0 + 0.05 + 0.2*(x+1)**2#*torch.abs(torch.cos(x*np.pi)) + 0.5 -xtest = torch.from_numpy(interval(n,d)) - -vals = gamma(xtest).T**2 + gamma(xtest)**2 + out = x[:, 0].view(-1, 1) * 0 + small = x <= -0.5 + mid = torch.logical_and(x >= -0.5, x <= 0.5) + large = x > 0.5 + gamma1 = 0.1 + gamma2 = 1.0 + out[small] = (gamma2 - gamma1) / (torch.exp(-25 * (x[small] + 0.5)) + 1) + gamma1 + out[mid] = gamma2 + out[large] = (gamma2 - gamma1) / (torch.exp(-25 * (-x[large] + 0.5)) + 1) + gamma1 + return out + + +gamma = lambda x: x[:, 0].view(-1, 1) * 0 + 0.05 + 0.3 * (x + 1) ** 4 + +# gamma = lambda x: x[x<-0.5]*0 +0 + 0.05 + 0.2*(x+1)**2#*torch.abs(torch.cos(x*np.pi)) + 0.5 +xtest = torch.from_numpy(interval(n, d)) + +vals = gamma(xtest).T ** 2 + gamma(xtest) ** 2 plt.imshow(vals) plt.colorbar() plt.show() -k = KernelFunction(kernel_name="gibbs", params={'gamma_fun':gamma}) -plt.imshow(k.kernel(xtest,xtest)) +k = KernelFunction(kernel_name="gibbs", params={"gamma_fun": gamma}) +plt.imshow(k.kernel(xtest, xtest)) plt.colorbar() plt.show() @@ -45,13 +47,23 @@ def gamma(x): s = 0.01 b = 0 -Emb = OptimalPositiveBasis(d, m, offset=0.0, s=s, b=b, discretization_size=n, B=1000., kernel_object=k, samples = 1000) +Emb = OptimalPositiveBasis( + d, + m, + offset=0.0, + s=s, + b=b, + discretization_size=n, + B=1000.0, + kernel_object=k, + samples=1000, +) for i in range(m): - f_i = Emb.basis_fun(xtest, i) ## basis function - plt.plot(xtest,f_i) + f_i = Emb.basis_fun(xtest, i) ## basis function + plt.plot(xtest, f_i) plt.show() # ytest = GP.sample(xtest) # plt.plot(xtest,ytest) -# plt.show() \ No newline at end of file +# plt.show() diff --git a/tests/gradient_confidence_test.py b/tests/gradient_confidence_test.py index 88288f7..19db4bd 100755 --- a/tests/gradient_confidence_test.py +++ b/tests/gradient_confidence_test.py @@ -2,39 +2,45 @@ import torch from stpy.continuous_processes.gauss_procc import GaussianProcess from stpy.helpers.helper import interval -#%matplotlib notebook + +# %matplotlib notebook # 2D Grid n = 20 n_vis = 50 d = 2 -xtest_vis = torch.from_numpy(interval(n_vis,d)) -xtest = torch.from_numpy(interval(n,d,L_infinity_ball=1.)) +xtest_vis = torch.from_numpy(interval(n_vis, d)) +xtest = torch.from_numpy(interval(n, d, L_infinity_ball=1.0)) noise_s = 0.001 bw = 0.4 -GP_true = GaussianProcess(groups = [[0],[1]], gamma = bw*torch.ones(2,dtype = torch.float64), kernel = "ard", s = noise_s) +GP_true = GaussianProcess( + groups=[[0], [1]], + gamma=bw * torch.ones(2, dtype=torch.float64), + kernel="ard", + s=noise_s, +) y = GP_true.sample(xtest) -GP_true.fit_gp(xtest,y) +GP_true.fit_gp(xtest, y) -zero = torch.from_numpy(np.array([[0.,0.]])) -gradient, hessian = GP_true.mean_gradient_hessian(zero, hessian = True) +zero = torch.from_numpy(np.array([[0.0, 0.0]])) +gradient, hessian = GP_true.mean_gradient_hessian(zero, hessian=True) -GP_fit = GaussianProcess(gamma = bw, kernel = "squared_exponential", s = noise_s) -GP_fit.fit_gp(xtest ,y) -#GP_fit.visualize(xtest_vis) +GP_fit = GaussianProcess(gamma=bw, kernel="squared_exponential", s=noise_s) +GP_fit.fit_gp(xtest, y) +# GP_fit.visualize(xtest_vis) GP_fit.log_marginal_likelihood_self() GP_fit.visualize_quiver(xtest_vis) -print ("Zero:" ,zero) +print("Zero:", zero) g, V = GP_fit.gradient_mean_var(zero) -print (gradient) +print(gradient) -print (V) +print(V) -print ("------------------") \ No newline at end of file +print("------------------") diff --git a/tests/gradients_test.py b/tests/gradients_test.py index 67b97b3..ef28e49 100755 --- a/tests/gradients_test.py +++ b/tests/gradients_test.py @@ -3,87 +3,100 @@ import torch from stpy.continuous_processes.gauss_procc import GaussianProcess from stpy.helpers.helper import interval -#%matplotlib notebook + +# %matplotlib notebook # 2D Grid -for n in np.arange(50,60,10): +for n in np.arange(50, 60, 10): n_vis = 50 d = 2 - xtest_vis = torch.from_numpy(interval(n_vis,d)) - xtest = torch.from_numpy(interval(n,d,L_infinity_ball=0.01)) + xtest_vis = torch.from_numpy(interval(n_vis, d)) + xtest = torch.from_numpy(interval(n, d, L_infinity_ball=0.01)) noise_s = 0.001 bw = 0.4 - GP_true = GaussianProcess(groups = [[0],[1]], gamma = bw*torch.ones(2,dtype = torch.float64), kernel = "ard", s = noise_s) + GP_true = GaussianProcess( + groups=[[0], [1]], + gamma=bw * torch.ones(2, dtype=torch.float64), + kernel="ard", + s=noise_s, + ) y = GP_true.sample(xtest) - GP_true.fit_gp(xtest,y) - - zero = torch.from_numpy(np.array([[0.,0.]])) - gradient, hessian = GP_true.mean_gradient_hessian(zero, hessian = True) + GP_true.fit_gp(xtest, y) - # print ("gradient:",gradient) - # print ("hessian:",hessian) + zero = torch.from_numpy(np.array([[0.0, 0.0]])) + gradient, hessian = GP_true.mean_gradient_hessian(zero, hessian=True) + # print ("gradient:",gradient) + # print ("hessian:",hessian) # [mu, _] = GP_true.get_lambdas(2, mean=True) # for z in [10e-1, 10e-2, 10e-3, 10e-4, 10e-5, 10e-6, 10e-7]: # print(z, stpy.helper.finite_differences(mu,z,xtest[0].view(1,-1))) - theta = np.radians(12.) + theta = np.radians(12.0) thetainv = np.pi - theta c, s = np.cos(theta), np.sin(theta) - RandRot = torch.from_numpy(np.array(((c,-s), (s, c)))) - #print (RandRot) + RandRot = torch.from_numpy(np.array(((c, -s), (s, c)))) + # print (RandRot) def eval(x): xprime = x.mm(RandRot) f = GP_true.mean_std(xprime)[0] return f - y_prime = eval(xtest) - GP_fit = GaussianProcess(groups = [[0,1]], gamma = bw*torch.ones(2,dtype = torch.float64), kernel = "ard", s = noise_s) - GP_fit.fit_gp(xtest,y_prime) + GP_fit = GaussianProcess( + groups=[[0, 1]], + gamma=bw * torch.ones(2, dtype=torch.float64), + kernel="ard", + s=noise_s, + ) + GP_fit.fit_gp(xtest, y_prime) GP_fit.visualize(xtest_vis) GP_fit.log_marginal_likelihood_self() - print ("Zero:",zero) + print("Zero:", zero) g, V = GP_fit.gradient_mean_var(zero) - print (gradient) + print(gradient) - print (V) + print(V) - print ("------------------") - - gradient, hessian = GP_fit.mean_gradient_hessian(zero, hessian = True) - Q = torch.symeig(hessian, eigenvectors = True)[1] + print("------------------") + gradient, hessian = GP_fit.mean_gradient_hessian(zero, hessian=True) + Q = torch.symeig(hessian, eigenvectors=True)[1] print(GP_fit.mean_std(zero)) - #print ("Estimated:",Q) - #print ("True:", RandRot) + # print ("Estimated:",Q) + # print ("True:", RandRot) P = torch.t(Q) @ RandRot - I = torch.eye(GP_fit.d, dtype = torch.float64) - Noise = s*I*s + I = torch.eye(GP_fit.d, dtype=torch.float64) + Noise = s * I * s Perm = torch.clamp(torch.abs(P), min=10e-3) - print (n, P,torch.norm(torch.abs(P)-Perm)) - + print(n, P, torch.norm(torch.abs(P) - Perm)) no = 100 - thetas = np.linspace(0.,np.pi,no) + thetas = np.linspace(0.0, np.pi, no) res = [] for theta in thetas: c, s = np.cos(theta), np.sin(theta) - Rot = np.array(((c,-s), (s, c))) + Rot = np.array(((c, -s), (s, c))) Rot = torch.from_numpy(Rot) - res.append(float(GP_fit.log_marginal_likelihood(GP_fit.kernel_object.gamma,Rot,GP_fit.kernel_object.kappa))) - plt.plot(thetas,res) - plt.plot([thetainv],np.average(np.array(res)),'ro') + res.append( + float( + GP_fit.log_marginal_likelihood( + GP_fit.kernel_object.gamma, Rot, GP_fit.kernel_object.kappa + ) + ) + ) + plt.plot(thetas, res) + plt.plot([thetainv], np.average(np.array(res)), "ro") plt.show() - GP_fit.optimize_params(type = "rots", restarts = 10) + GP_fit.optimize_params(type="rots", restarts=10) GP_fit.log_marginal_likelihood_self() print(GP_fit.Rot) diff --git a/tests/hessian-estimation-test.py b/tests/hessian-estimation-test.py index af8d033..bcae5ac 100755 --- a/tests/hessian-estimation-test.py +++ b/tests/hessian-estimation-test.py @@ -10,14 +10,14 @@ L_infinity_ball = 0.5 d = 2 -thetae = np.radians(35.) +thetae = np.radians(35.0) ce, se = np.cos(thetae), np.sin(thetae) R = torch.from_numpy(np.array(((ce, -se), (se, ce)))) D = torch.diag(torch.Tensor([0.8, 1.1]).double()) -#D = torch.diag(torch.Tensor([1, 1]).double()) +# D = torch.diag(torch.Tensor([1, 1]).double()) W = R.T @ D @ R -print (W) +print(W) BenchmarkFunc = QuadraticBenchmark(d=d, R=W) x = BenchmarkFunc.initial_guess(N) @@ -31,27 +31,36 @@ F0 = lambda x: BenchmarkFunc.eval(x, sigma=0) -def plot_contour(xtest,ytest,lim=None): +def plot_contour(xtest, ytest, lim=None): from scipy.interpolate import griddata + xx = xtest[:, 0].numpy() yy = xtest[:, 1].numpy() - grid_x, grid_y = np.mgrid[min(xx):max(xx):100j, min(yy):max(yy):100j] - grid_z_mu = griddata((xx, yy), ytest[:, 0].numpy(), (grid_x, grid_y), method='linear') + grid_x, grid_y = np.mgrid[min(xx) : max(xx) : 100j, min(yy) : max(yy) : 100j] + grid_z_mu = griddata( + (xx, yy), ytest[:, 0].numpy(), (grid_x, grid_y), method="linear" + ) fig, ax = plt.subplots(figsize=(10, 9)) cs = ax.contourf(grid_x, grid_y, grid_z_mu) - ax.contour(cs, colors='k') + ax.contour(cs, colors="k") if lim is not None: - plt.xlim([-lim,lim]) - plt.ylim([-lim,lim]) + plt.xlim([-lim, lim]) + plt.ylim([-lim, lim]) plt.colorbar(cs) # Plot grid. - ax.grid(c='k', ls='-', alpha=0.1) - + ax.grid(c="k", ls="-", alpha=0.1) ## Additive Model m = 64 -GP = GaussianProcessFF(d=d, s=s, m = torch.ones(d)*m, gamma=gamma*torch.ones(d), bounds=bounds, groups = stpy.helpers.helper.full_group(d)) +GP = GaussianProcessFF( + d=d, + s=s, + m=torch.ones(d) * m, + gamma=gamma * torch.ones(d), + bounds=bounds, + groups=stpy.helpers.helper.full_group(d), +) ## Global Model # m = 512 @@ -61,23 +70,22 @@ def plot_contour(xtest,ytest,lim=None): p = 5 d = 2 -embedding = PolynomialEmbedding(d,p) +embedding = PolynomialEmbedding(d, p) Map = lambda x: embedding.embed(x) # Starting points x0_1 = torch.Tensor([0.1, 0.1]).double().view(-1, d) -#x0_1 = torch.Tensor([-0.1, 0.]).double().view(-1, d) +# x0_1 = torch.Tensor([-0.1, 0.]).double().view(-1, d) x0_2 = torch.Tensor([0.1, 0.1]).double().view(-1, d) print("Embeding size:", Map(x0_1).size()) -Bandit = OPPR_TS_GP(x0_1, F, GP, Map, finite_dim=True, s = s, GPMap = True) -#Bandit.decolerate(x0_1,10e-5,1) -Bandit.decolerate_AJD([x0_1,x0_2],10e-5,1) - -print (Bandit.Q) -print (W@Bandit.Q) -print (W@torch.inverse(Bandit.Q)) +Bandit = OPPR_TS_GP(x0_1, F, GP, Map, finite_dim=True, s=s, GPMap=True) +# Bandit.decolerate(x0_1,10e-5,1) +Bandit.decolerate_AJD([x0_1, x0_2], 10e-5, 1) +print(Bandit.Q) +print(W @ Bandit.Q) +print(W @ torch.inverse(Bandit.Q)) diff --git a/tests/interval_groups_test.py b/tests/interval_groups_test.py index 0c20a87..bb566e0 100644 --- a/tests/interval_groups_test.py +++ b/tests/interval_groups_test.py @@ -1,10 +1,15 @@ -from stpy.helpers.helper import interval_groups, get_hierarchy, hierarchical_distance, valid_enlargement +from stpy.helpers.helper import ( + interval_groups, + get_hierarchy, + hierarchical_distance, + valid_enlargement, +) if __name__ == "__main__": - out = get_hierarchy(start = 0,new_elements=[1,2,3]) - curr = [[0], [1], [2], [3]] - print(hierarchical_distance(curr, [[0,1],[2],[3]])) - enlargements = valid_enlargement(curr, out) - for enlargement in enlargements: - print (curr,"->",out[enlargement]) \ No newline at end of file + out = get_hierarchy(start=0, new_elements=[1, 2, 3]) + curr = [[0], [1], [2], [3]] + print(hierarchical_distance(curr, [[0, 1], [2], [3]])) + enlargements = valid_enlargement(curr, out) + for enlargement in enlargements: + print(curr, "->", out[enlargement]) diff --git a/tests/kernelized-features-test.py b/tests/kernelized-features-test.py index 3da0af7..e63d65e 100644 --- a/tests/kernelized-features-test.py +++ b/tests/kernelized-features-test.py @@ -5,27 +5,27 @@ import numpy as np if __name__ == "__main__": - m = 16 - gamma = 1. - s = 0.0001 - n = 40 + m = 16 + gamma = 1.0 + s = 0.0001 + n = 40 - embedding = HermiteEmbedding(m = m, gamma = gamma) - GP = KernelizedFeatures(embedding=embedding,s = s,m = m) + embedding = HermiteEmbedding(m=m, gamma=gamma) + GP = KernelizedFeatures(embedding=embedding, s=s, m=m) - x = torch.from_numpy(interval(n,1)) - xtest = torch.from_numpy(interval(2048,1)) - F = lambda x: torch.sin(10*x) - y = F(x) + x = torch.from_numpy(interval(n, 1)) + xtest = torch.from_numpy(interval(2048, 1)) + F = lambda x: torch.sin(10 * x) + y = F(x) - GP.fit_gp(x,y) - mu, std = GP.mean_std(xtest) - print (mu.size()) - print (std.size()) - GP.visualize(xtest) + GP.fit_gp(x, y) + mu, std = GP.mean_std(xtest) + print(mu.size()) + print(std.size()) + GP.visualize(xtest) - for _ in range(30): - x = torch.from_numpy(np.random.uniform(-1,1,1)).view(1,1) - GP.add_data_point(x,F(x)) + for _ in range(30): + x = torch.from_numpy(np.random.uniform(-1, 1, 1)).view(1, 1) + GP.add_data_point(x, F(x)) - GP.visualize(xtest) + GP.visualize(xtest) diff --git a/tests/kernels/ard_matern_kernel_test.py b/tests/kernels/ard_matern_kernel_test.py index 56379b3..fd4b05b 100644 --- a/tests/kernels/ard_matern_kernel_test.py +++ b/tests/kernels/ard_matern_kernel_test.py @@ -10,14 +10,20 @@ d = 2 eps = 0.01 s = 1 -x = torch.rand(N,d).double()*2 - 1 -xtest = torch.from_numpy(interval(n,d,L_infinity_ball=1)) +x = torch.rand(N, d).double() * 2 - 1 +xtest = torch.from_numpy(interval(n, d, L_infinity_ball=1)) # true GP = GaussianProcess(kernel_name="ard_matern", d=d) y = GP.sample(x) -GP.fit_gp(x,y) -GP.optimize_params(type="bandwidth", restarts=5, verbose = False, optimizer = 'pytorch-minimize', scale = 1., weight=1.) +GP.fit_gp(x, y) +GP.optimize_params( + type="bandwidth", + restarts=5, + verbose=False, + optimizer="pytorch-minimize", + scale=1.0, + weight=1.0, +) GP.visualize_contour(xtest) # - diff --git a/tests/marginalized_likelihood_test.py b/tests/marginalized_likelihood_test.py index 8dc1a6c..c1551c7 100644 --- a/tests/marginalized_likelihood_test.py +++ b/tests/marginalized_likelihood_test.py @@ -7,102 +7,98 @@ from pymanopt.manifolds import Euclidean if __name__ == "__main__": - d = 2 - n = 3 - - - ## Squared exponential with single parameter - GP = GaussianProcess(gamma=1., kernel_name="ard", d=2) - x = torch.rand(n,d).double()*2 - 1 - y = GP.sample(x) - GP.fit_gp(x,y) - xtest = torch.from_numpy(interval(50,2,L_infinity_ball=1)) - - # - # init_val = None - # manifold = Euclidean(2) - # bounds = None - # - # params = {"0":{"kappa":(1.,Euclidean(1),None),"ard_gamma":(init_val, manifold, bounds)}} - #GP.optimize_params_general(params = params, maxiter = 100) - - #GP.optimize_params(type = "bandwidth", restarts=2) - - -# - ## Additive quick - k = KernelFunction(kernel_name = "ard", d = 2, groups = [[0],[1]] ) - GP = GaussianProcess(kernel=k) - x = torch.rand(n,d).double()*2 - 1 - y = GP.sample(x) - GP.fit_gp(x,y) - - #GP.optimize_params(type="bandwidth", restarts=2) - - - - - # ## Additive via algebra - k1 = KernelFunction(kernel_name="ard" ,ard_gamma = 0.1, d = 1, group=[0]) - k2 = KernelFunction(kernel_name="polynomial" ,ard_gamma = 0.5, power = 2, d = 1, group=[1]) - k = k1 + k2 - # - # print (k.params_dict) - GP = GaussianProcess(kernel=k, d=2) - # - x = torch.rand(n, d).double() * 2 - 1 - y = GP.sample(x) - GP.fit_gp(x, y) - #GP.optimize_params(type="bandwidth", restarts=2) - - - ## Additive two the same - k1 = KernelFunction(kernel_name="ard" ,ard_gamma = 0.1, d = 1, group=[0]) - k2 = KernelFunction(kernel_name="ard" ,ard_gamma = 0.5, power = 2, d = 1, group=[1]) - GP = GaussianProcess(kernel=k, d=2) - # - x = torch.rand(n, d).double() * 2 - 1 - y = GP.sample(x) - GP.fit_gp(x, y) - #GP.optimize_params(type="bandwidth", restarts=2) - - - ## Optimize groups - k = KernelFunction(kernel_name="ard", d=2, groups = [[0,1]]) - GP = GaussianProcess(kernel=k, d=2) - # - x = torch.rand(n, d).double() * 2 - 1 - y = GP.sample(x) - GP.fit_gp(x, y) - #print(k.params_dict) - #GP.optimize_params(type="groups", restarts=2) - - ## Optimize power in polynomial kernel - k = KernelFunction(kernel_name="polynomial", d=2, power = 3) - GP = GaussianProcess(kernel=k, d=2) - # - x = torch.rand(n, d).double() * 2 - 1 - y = GP.sample(x) - GP.fit_gp(x, y) - #print(k.params_dict) - params = {"0":{"power":(1.,[1,2,3,4,5],None)}} - #GP.optimize_params_general(params = params, optimizer="discrete") - - - ## Covar - k = KernelFunction(kernel_name="full_covariance_se", d=2) - GP = GaussianProcess(kernel=k, d=2) - # - x = torch.rand(n, d).double() * 2 - 1 - y = GP.sample(x) - GP.fit_gp(x, y) - #GP.optimize_params(type="covariance", restarts=2) - - ## cova with regularizer - k = KernelFunction(kernel_name="full_covariance_se", d=2) - GP = GaussianProcess(kernel=k, d=2) - # - x = torch.rand(n, d).double() * 2 - 1 - y = GP.sample(x) - GP.fit_gp(x, y) - GP.optimize_params(type="covariance", restarts=2, regularizer=["spectral_norm",0.1]) \ No newline at end of file + d = 2 + n = 3 + + ## Squared exponential with single parameter + GP = GaussianProcess(gamma=1.0, kernel_name="ard", d=2) + x = torch.rand(n, d).double() * 2 - 1 + y = GP.sample(x) + GP.fit_gp(x, y) + xtest = torch.from_numpy(interval(50, 2, L_infinity_ball=1)) + + # + # init_val = None + # manifold = Euclidean(2) + # bounds = None + # + # params = {"0":{"kappa":(1.,Euclidean(1),None),"ard_gamma":(init_val, manifold, bounds)}} + # GP.optimize_params_general(params = params, maxiter = 100) + + # GP.optimize_params(type = "bandwidth", restarts=2) + + # + ## Additive quick + k = KernelFunction(kernel_name="ard", d=2, groups=[[0], [1]]) + GP = GaussianProcess(kernel=k) + x = torch.rand(n, d).double() * 2 - 1 + y = GP.sample(x) + GP.fit_gp(x, y) + + # GP.optimize_params(type="bandwidth", restarts=2) + + # ## Additive via algebra + k1 = KernelFunction(kernel_name="ard", ard_gamma=0.1, d=1, group=[0]) + k2 = KernelFunction( + kernel_name="polynomial", ard_gamma=0.5, power=2, d=1, group=[1] + ) + k = k1 + k2 + # + # print (k.params_dict) + GP = GaussianProcess(kernel=k, d=2) + # + x = torch.rand(n, d).double() * 2 - 1 + y = GP.sample(x) + GP.fit_gp(x, y) + # GP.optimize_params(type="bandwidth", restarts=2) + + ## Additive two the same + k1 = KernelFunction(kernel_name="ard", ard_gamma=0.1, d=1, group=[0]) + k2 = KernelFunction(kernel_name="ard", ard_gamma=0.5, power=2, d=1, group=[1]) + GP = GaussianProcess(kernel=k, d=2) + # + x = torch.rand(n, d).double() * 2 - 1 + y = GP.sample(x) + GP.fit_gp(x, y) + # GP.optimize_params(type="bandwidth", restarts=2) + + ## Optimize groups + k = KernelFunction(kernel_name="ard", d=2, groups=[[0, 1]]) + GP = GaussianProcess(kernel=k, d=2) + # + x = torch.rand(n, d).double() * 2 - 1 + y = GP.sample(x) + GP.fit_gp(x, y) + # print(k.params_dict) + # GP.optimize_params(type="groups", restarts=2) + + ## Optimize power in polynomial kernel + k = KernelFunction(kernel_name="polynomial", d=2, power=3) + GP = GaussianProcess(kernel=k, d=2) + # + x = torch.rand(n, d).double() * 2 - 1 + y = GP.sample(x) + GP.fit_gp(x, y) + # print(k.params_dict) + params = {"0": {"power": (1.0, [1, 2, 3, 4, 5], None)}} + # GP.optimize_params_general(params = params, optimizer="discrete") + + ## Covar + k = KernelFunction(kernel_name="full_covariance_se", d=2) + GP = GaussianProcess(kernel=k, d=2) + # + x = torch.rand(n, d).double() * 2 - 1 + y = GP.sample(x) + GP.fit_gp(x, y) + # GP.optimize_params(type="covariance", restarts=2) + + ## cova with regularizer + k = KernelFunction(kernel_name="full_covariance_se", d=2) + GP = GaussianProcess(kernel=k, d=2) + # + x = torch.rand(n, d).double() * 2 - 1 + y = GP.sample(x) + GP.fit_gp(x, y) + GP.optimize_params( + type="covariance", restarts=2, regularizer=["spectral_norm", 0.1] + ) diff --git a/tests/orthogonal_map_test.py b/tests/orthogonal_map_test.py index d173cc0..4be4589 100755 --- a/tests/orthogonal_map_test.py +++ b/tests/orthogonal_map_test.py @@ -5,45 +5,47 @@ if __name__ == "__main__": + dim = 4 + Benchmark = ProteinBenchmark( + "/home/mojko/Documents/PhD/stpy/stpy/test_functions/protein_data_gb1.h5", + dim=dim, + ref=["A", "B", "C", "D"], + ) + Benchmark.self_translate() - dim = 4 + X = Benchmark.data.values[:, 0:dim].astype(int) + Y = Benchmark.data.values[:, 5].astype(float).reshape(-1, 1) - Benchmark = ProteinBenchmark("/home/mojko/Documents/PhD/stpy/stpy/test_functions/protein_data_gb1.h5", dim=dim, ref=['A', 'B', 'C', 'D']) - Benchmark.self_translate() + X_one_hot = Benchmark.translate_one_hot(X) - X = Benchmark.data.values[:,0:dim].astype(int) - Y = Benchmark.data.values[:,5].astype(float).reshape(-1,1) + X_train, X_test, y_train, y_test = train_test_split( + X_one_hot, Y, test_size=0.20, random_state=42 + ) + X_train = torch.from_numpy(X_train) + X_test = torch.from_numpy(X_test) + y_train = torch.from_numpy(y_train) + y_test = torch.from_numpy(y_test) - X_one_hot = Benchmark.translate_one_hot(X) + print(X_train.size()) + print(y_train.size()) - X_train, X_test, y_train, y_test = train_test_split(X_one_hot, Y, test_size = 0.20, random_state = 42) + print(X_test.size()) + print(y_test.size()) - X_train = torch.from_numpy(X_train) - X_test = torch.from_numpy(X_test) - y_train = torch.from_numpy(y_train) - y_test = torch.from_numpy(y_test) + d = dim * 26 + m = dim * 26 - print(X_train.size()) - print(y_train.size()) + ridge = lambda x: torch.relu(x) + Net = RandomMap(d, m, ridge, output=1) + print("Loss before training: ", Net.loss(X_test, y_test)) - print(X_test.size()) - print(y_test.size()) + Net.fit_map(X_train, y_train, verbose=1, lr=10e-1, epochs=100) - d = dim*26 - m = dim*26 + print("Net:", Net.forward(X_test[1, :].view(1, -1))) - ridge = lambda x: torch.relu(x) - Net = RandomMap(d,m,ridge, output = 1) + print("Truth:", y_test[1, :]) - print ("Loss before training: ",Net.loss(X_test,y_test)) - - Net.fit_map(X_train,y_train, verbose=1, lr = 10e-1, epochs = 100) - - print ("Net:",Net.forward(X_test[1,:].view(1,-1))) - - print ("Truth:",y_test[1,:]) - - print (Net.loss(X_test,y_test)) \ No newline at end of file + print(Net.loss(X_test, y_test)) diff --git a/tests/regularization_basis.py b/tests/regularization_basis.py index 8d4cea1..2ccc6b9 100644 --- a/tests/regularization_basis.py +++ b/tests/regularization_basis.py @@ -6,67 +6,83 @@ from stpy.helpers.helper import interval import matplotlib.pyplot as plt from stpy.kernels import KernelFunction -from stpy.embeddings.bernstein_embedding import BernsteinEmbedding, BernsteinSplinesEmbedding, BernsteinSplinesOverlapping -from stpy.embeddings.bump_bases import TriangleEmbedding,PositiveNystromEmbeddingBump +from stpy.embeddings.bernstein_embedding import ( + BernsteinEmbedding, + BernsteinSplinesEmbedding, + BernsteinSplinesOverlapping, +) +from stpy.embeddings.bump_bases import TriangleEmbedding, PositiveNystromEmbeddingBump if __name__ == "__main__": - d = 1 - m = 32 - n = 256 - N = 20 + d = 1 + m = 32 + n = 256 + N = 20 - s = 0.01 - b = 0.1 - B = 0.5 + s = 0.01 + b = 0.1 + B = 0.5 - gamma = 0.1 - kernel_object = KernelFunction(gamma = gamma) - kernel_object_poly = KernelFunction(kernel_name="polynomial", power = N) + gamma = 0.1 + kernel_object = KernelFunction(gamma=gamma) + kernel_object_poly = KernelFunction(kernel_name="polynomial", power=N) - EmbBern = BernsteinEmbedding(d,m,kernel_object=kernel_object,offset=0.5,b=b,B=B,s = s) - EmbSplines = BernsteinSplinesEmbedding(d,m,kernel_object=kernel_object,offset=0.5,b=b,B=B,s = s) - EmbSplinesOverlap = BernsteinSplinesOverlapping(d,m,kernel_object=kernel_object,offset=0.5,b=b,B=B,s = s) - Emb = TriangleEmbedding(d,m,kernel_object=kernel_object,offset=0.5,b=b,B=B,s = s) - Embpoly = TriangleEmbedding(d,m,kernel_object=kernel_object_poly,offset=0.5,b=b,B=B,s = s) - Embnys = PositiveNystromEmbeddingBump(d, m, kernel_object=kernel_object, offset=0.5, b=0, B=1000, s = s) + EmbBern = BernsteinEmbedding( + d, m, kernel_object=kernel_object, offset=0.5, b=b, B=B, s=s + ) + EmbSplines = BernsteinSplinesEmbedding( + d, m, kernel_object=kernel_object, offset=0.5, b=b, B=B, s=s + ) + EmbSplinesOverlap = BernsteinSplinesOverlapping( + d, m, kernel_object=kernel_object, offset=0.5, b=b, B=B, s=s + ) + Emb = TriangleEmbedding( + d, m, kernel_object=kernel_object, offset=0.5, b=b, B=B, s=s + ) + Embpoly = TriangleEmbedding( + d, m, kernel_object=kernel_object_poly, offset=0.5, b=b, B=B, s=s + ) + Embnys = PositiveNystromEmbeddingBump( + d, m, kernel_object=kernel_object, offset=0.5, b=0, B=1000, s=s + ) - GP = GaussianProcess(d = d, s = s, kernel=kernel_object) + GP = GaussianProcess(d=d, s=s, kernel=kernel_object) - xtest = torch.from_numpy(interval(n,d,L_infinity_ball=1.1)) - x = torch.from_numpy(np.random.uniform(-1,1,N)).view(-1,1) + xtest = torch.from_numpy(interval(n, d, L_infinity_ball=1.1)) + x = torch.from_numpy(np.random.uniform(-1, 1, N)).view(-1, 1) - F_true = lambda x: torch.sin(5*x)**2-0.1 - F = lambda x: F_true(x) + s*torch.randn(x.size()[0]).view(-1,1).double() - y = F(x) + F_true = lambda x: torch.sin(5 * x) ** 2 - 0.1 + F = lambda x: F_true(x) + s * torch.randn(x.size()[0]).view(-1, 1).double() + y = F(x) - Emb.fit(x, y) - EmbBern.fit(x, y) - Embpoly.fit(x, y) - EmbSplines.fit(x, y) - EmbSplinesOverlap.fit(x, y) - Embnys.fit(x, y) - GP.fit_gp(x,y) + Emb.fit(x, y) + EmbBern.fit(x, y) + Embpoly.fit(x, y) + EmbSplines.fit(x, y) + EmbSplinesOverlap.fit(x, y) + Embnys.fit(x, y) + GP.fit_gp(x, y) - mu = Emb.mean_std(xtest) - mu_spline = EmbSplines.mean_std(xtest) - mu_spline_overlap = EmbSplinesOverlap.mean_std(xtest) - mu_true,_ = GP.mean_std(xtest) - mu_bern = EmbBern.mean_std(xtest) - mu_poly = Embpoly.mean_std(xtest) - mu_pos = Embnys.mean_std(xtest) + mu = Emb.mean_std(xtest) + mu_spline = EmbSplines.mean_std(xtest) + mu_spline_overlap = EmbSplinesOverlap.mean_std(xtest) + mu_true, _ = GP.mean_std(xtest) + mu_bern = EmbBern.mean_std(xtest) + mu_poly = Embpoly.mean_std(xtest) + mu_pos = Embnys.mean_std(xtest) - plt.plot(xtest, xtest*0+b, 'k--') - plt.plot(xtest, xtest * 0 + B, 'k--') + plt.plot(xtest, xtest * 0 + b, "k--") + plt.plot(xtest, xtest * 0 + B, "k--") - plt.plot(xtest,F_true(xtest),'r', label = 'true') - plt.plot(xtest,mu_true,'b--', label = 'no-constraints') - plt.plot(xtest,mu_pos) - plt.plot(x,y,'ro') - plt.plot(xtest, mu, 'g-x', label = 'Triangles') - #plt.plot(xtest, mu_bern, 'y-o',label = 'Bernstein basis') - #plt.plot(xtest, mu_poly, color = 'orange', label='triangles polynomial kernel') - #plt.plot(xtest, mu_spline, color='purple', label='splines') - #plt.plot(xtest, mu_spline_overlap, color='brown', label='splines_overlap') - plt.legend() - plt.show() \ No newline at end of file + plt.plot(xtest, F_true(xtest), "r", label="true") + plt.plot(xtest, mu_true, "b--", label="no-constraints") + plt.plot(xtest, mu_pos) + plt.plot(x, y, "ro") + plt.plot(xtest, mu, "g-x", label="Triangles") + # plt.plot(xtest, mu_bern, 'y-o',label = 'Bernstein basis') + # plt.plot(xtest, mu_poly, color = 'orange', label='triangles polynomial kernel') + # plt.plot(xtest, mu_spline, color='purple', label='splines') + # plt.plot(xtest, mu_spline_overlap, color='brown', label='splines_overlap') + plt.legend() + plt.show() diff --git a/tests/spike-basis-general.py b/tests/spike-basis-general.py index 51bc283..fff3113 100644 --- a/tests/spike-basis-general.py +++ b/tests/spike-basis-general.py @@ -4,21 +4,22 @@ import torch import matplotlib.pyplot as plt from stpy.borel_set import BorelSet + if __name__ == "__main__": - d = 1 - m = 100 - S = BorelSet(1,[-1,1]) + d = 1 + m = 100 + S = BorelSet(1, [-1, 1]) - embed_p = FaberSchauderEmbedding(d=d, m=p) - print (torch.sum(embed_p.integral(S))) + embed_p = FaberSchauderEmbedding(d=d, m=p) + print(torch.sum(embed_p.integral(S))) - m = embed_p.size - GP = KernelizedFeatures(embeding=embed_p, m=m, d=d) - F = lambda x: torch.sin(x) - x = torch.from_numpy(interval(2,d)) - xtest = torch.from_numpy(interval(1024, d)) - GP.fit_gp(x, F(x)) - GP.visualize(xtest, f_true=F, show = False) - for j in range(p): - plt.plot(xtest,embed_p.basis_fun(xtest,j+1)) - plt.show() \ No newline at end of file + m = embed_p.size + GP = KernelizedFeatures(embeding=embed_p, m=m, d=d) + F = lambda x: torch.sin(x) + x = torch.from_numpy(interval(2, d)) + xtest = torch.from_numpy(interval(1024, d)) + GP.fit_gp(x, F(x)) + GP.visualize(xtest, f_true=F, show=False) + for j in range(p): + plt.plot(xtest, embed_p.basis_fun(xtest, j + 1)) + plt.show() diff --git a/tests/test-absolute-deviation.py b/tests/test-absolute-deviation.py index f3ec4ea..db2c30e 100644 --- a/tests/test-absolute-deviation.py +++ b/tests/test-absolute-deviation.py @@ -5,25 +5,25 @@ import matplotlib.pyplot as plt if __name__ == "__main__": - d = 1 - p = 4 - embed_p = ChebyschevEmbedding(d=d, p=p) - m = embed_p.size - GP = KernelizedFeatures(embeding=embed_p, m=m, d=d) + d = 1 + p = 4 + embed_p = ChebyschevEmbedding(d=d, p=p) + m = embed_p.size + GP = KernelizedFeatures(embeding=embed_p, m=m, d=d) - x = torch.from_numpy(interval(10,d)) - xtest = torch.from_numpy(interval(1024, d)) - GP.fit_gp(x, x**4) + x = torch.from_numpy(interval(10, d)) + xtest = torch.from_numpy(interval(1024, d)) + GP.fit_gp(x, x**4) - mu = GP.mean_aboslute_deviation(xtest, B = None) - mu2 = GP.mean_aboslute_deviation(xtest, B = 0.1) - mu3 = GP.mean_std(xtest)[0] - mu4 = GP.mean_constrained(xtest, B = 0.1) - #GP.visualize(xtest, show = False) + mu = GP.mean_aboslute_deviation(xtest, B=None) + mu2 = GP.mean_aboslute_deviation(xtest, B=0.1) + mu3 = GP.mean_std(xtest)[0] + mu4 = GP.mean_constrained(xtest, B=0.1) + # GP.visualize(xtest, show = False) - plt.plot(xtest,mu, "--",label = 'l1 unconstrained', alpha = 0.5) - plt.plot(xtest, mu2,"--",label = 'l1 constrained', alpha = 0.5) - plt.plot(xtest, mu3, label = 'l2 unconstrained', alpha = 0.5) - plt.plot(xtest, mu4,label = 'l2 constrained', alpha = 0.5) - plt.legend() - plt.show() \ No newline at end of file + plt.plot(xtest, mu, "--", label="l1 unconstrained", alpha=0.5) + plt.plot(xtest, mu2, "--", label="l1 constrained", alpha=0.5) + plt.plot(xtest, mu3, label="l2 unconstrained", alpha=0.5) + plt.plot(xtest, mu4, label="l2 constrained", alpha=0.5) + plt.legend() + plt.show() diff --git a/tests/test-positive-basis.py b/tests/test-positive-basis.py index 71634ba..d07d575 100644 --- a/tests/test-positive-basis.py +++ b/tests/test-positive-basis.py @@ -9,16 +9,16 @@ from stpy.helpers.helper import interval m = 32 -kernel = KernelFunction(gamma = 0.1,kernel_name="squared_exponential", power = 5) -B4 = PositiveNystromEmbeddingBump(kernel_object=kernel, m = m, d = 1, samples = 100) +kernel = KernelFunction(gamma=0.1, kernel_name="squared_exponential", power=5) +B4 = PositiveNystromEmbeddingBump(kernel_object=kernel, m=m, d=1, samples=100) -plt.figure(figsize = (20,20)) -basis = lambda x,j: B4.basis_fun(x,j) -x = torch.from_numpy(np.linspace(-1,1,100)).view(-1,1) +plt.figure(figsize=(20, 20)) +basis = lambda x, j: B4.basis_fun(x, j) +x = torch.from_numpy(np.linspace(-1, 1, 100)).view(-1, 1) for j in range(m): - plt.plot(x,basis(x,j), lw = 6) - plt.grid(ls = '--', lw = 4) - plt.xlim((-1,1)) + plt.plot(x, basis(x, j), lw=6) + plt.grid(ls="--", lw=4) + plt.xlim((-1, 1)) plt.show() diff --git a/tests/test_functions/felsimulator_test.py b/tests/test_functions/felsimulator_test.py index 91b8cb2..e12b571 100644 --- a/tests/test_functions/felsimulator_test.py +++ b/tests/test_functions/felsimulator_test.py @@ -7,8 +7,6 @@ if __name__ == "__main__": sigma = 0.1 - xtest = interval_torch(30, d= 2, L_infinity_ball=0.5) - F = SwissFEL(d =2, dts = 'evaluations_bpm.hdf5') + xtest = interval_torch(30, d=2, L_infinity_ball=0.5) + F = SwissFEL(d=2, dts="evaluations_bpm.hdf5") F.Simulator.GP.visualize_contour(xtest) - - diff --git a/tests/triangle-integration-test.py b/tests/triangle-integration-test.py index 3c74e3d..591faff 100644 --- a/tests/triangle-integration-test.py +++ b/tests/triangle-integration-test.py @@ -10,42 +10,41 @@ if __name__ == "__main__": - d = 1 - m = 64 - S = BorelSet(1,[-1,1]) - - embedding = TriangleEmbedding(d=d, m=m, s = 10e-8) - - levels = 5 - hierarchical_structure = HierarchicalBorelSets(d=1, interval=(-1, 1), levels=levels) - basic_sets = hierarchical_structure.get_sets_level(hierarchical_structure.levels) - - xtest = hierarchical_structure.top_node.return_discretization(512) - - for set in basic_sets: - print (set.bounds, set.volume()) - x = torch.linspace(set.bounds[0, 0], set.bounds[0, 1], 2) - Gamma_half = embedding.cov() - val = torch.sum(torch.pinverse(Gamma_half)@embedding.integral(set)) - - - plt.plot(x, x * 0 + float(val)/set.volume(), '-o', color="green", lw=5) - for i in range(m): - plt.plot(xtest, embedding.basis_fun(xtest,i), 'k') - plt.show() - - plt.subplot(1,2,1) - plt.imshow(embedding.M) - plt.subplot(1,2,2) - plt.imshow(embedding.Gamma_half) - plt.show() - # m = embed_p.size - # GP = KernelizedFeatures(embeding=embed_p, m=m, d=d) - # F = lambda x: torch.sin(x) - # x = torch.from_numpy(interval(2,d)) - # xtest = torch.from_numpy(interval(1024, d)) - # GP.fit_gp(x, F(x)) - # GP.visualize(xtest, f_true=F, show = False) - # for j in range(p): - # plt.plot(xtest,embed_p.basis_fun(xtest,j+1)) - # plt.show() \ No newline at end of file + d = 1 + m = 64 + S = BorelSet(1, [-1, 1]) + + embedding = TriangleEmbedding(d=d, m=m, s=10e-8) + + levels = 5 + hierarchical_structure = HierarchicalBorelSets(d=1, interval=(-1, 1), levels=levels) + basic_sets = hierarchical_structure.get_sets_level(hierarchical_structure.levels) + + xtest = hierarchical_structure.top_node.return_discretization(512) + + for set in basic_sets: + print(set.bounds, set.volume()) + x = torch.linspace(set.bounds[0, 0], set.bounds[0, 1], 2) + Gamma_half = embedding.cov() + val = torch.sum(torch.pinverse(Gamma_half) @ embedding.integral(set)) + + plt.plot(x, x * 0 + float(val) / set.volume(), "-o", color="green", lw=5) + for i in range(m): + plt.plot(xtest, embedding.basis_fun(xtest, i), "k") + plt.show() + + plt.subplot(1, 2, 1) + plt.imshow(embedding.M) + plt.subplot(1, 2, 2) + plt.imshow(embedding.Gamma_half) + plt.show() + # m = embed_p.size + # GP = KernelizedFeatures(embeding=embed_p, m=m, d=d) + # F = lambda x: torch.sin(x) + # x = torch.from_numpy(interval(2,d)) + # xtest = torch.from_numpy(interval(1024, d)) + # GP.fit_gp(x, F(x)) + # GP.visualize(xtest, f_true=F, show = False) + # for j in range(p): + # plt.plot(xtest,embed_p.basis_fun(xtest,j+1)) + # plt.show() From 605d3ec6ebc0b6239ef4e838d6e7df78200b5d68 Mon Sep 17 00:00:00 2001 From: peibensteine Date: Fri, 11 Oct 2024 13:42:25 +0200 Subject: [PATCH 05/39] rename poissonRateEstimator variables --- stpy/kernels.py | 4 +- .../log_link_rate_estimator.py | 4 +- .../poisson/link_fun_rate_estimator.py | 16 ++- .../poisson/mbr_positive_estimator.py | 12 +- .../point_processes/poisson_rate_estimator.py | 124 +++++++++--------- 5 files changed, 87 insertions(+), 73 deletions(-) diff --git a/stpy/kernels.py b/stpy/kernels.py index 1cacbb0..66fbd54 100755 --- a/stpy/kernels.py +++ b/stpy/kernels.py @@ -19,10 +19,10 @@ def __init__( freq=None, groups=None, d=1, - gamma=1, + gamma: float = 1.0, ard_gamma=None, nu=1.5, - kappa=1, + kappa: float = 1.0, map=None, power=2, cov=None, diff --git a/stpy/point_processes/log_link_rate_estimator.py b/stpy/point_processes/log_link_rate_estimator.py index 54dcfef..9ee1086 100644 --- a/stpy/point_processes/log_link_rate_estimator.py +++ b/stpy/point_processes/log_link_rate_estimator.py @@ -243,8 +243,8 @@ def map_lcb_ucb(self, S, n, beta=2.0, delta=0.01): xtest = S.return_discretization(n) if self.data is None: return ( - self.b + 0 * xtest[:, 0].view(-1, 1), - self.b + 0 * xtest[:, 0].view(-1, 1), + self.min_intensity + 0 * xtest[:, 0].view(-1, 1), + self.min_intensity + 0 * xtest[:, 0].view(-1, 1), self.B + 0 * xtest[:, 0].view(-1, 1), ) diff --git a/stpy/point_processes/poisson/link_fun_rate_estimator.py b/stpy/point_processes/poisson/link_fun_rate_estimator.py index ad0b3f1..3ecfef2 100644 --- a/stpy/point_processes/poisson/link_fun_rate_estimator.py +++ b/stpy/point_processes/poisson/link_fun_rate_estimator.py @@ -140,7 +140,7 @@ def product_integral(self, S): def get_constraints(self): s = self.get_m() - l = np.full(s, self.b) + l = np.full(s, self.min_intensity) u = np.full(s, self.B) Lambda = np.identity(s) return (l, Lambda, u) @@ -251,7 +251,7 @@ def construct_covariance_matrix_laplace(self): ].view(1, -1) k = np.maximum( torch.dot(self.observations[i, :], self.rate.view(-1)) ** 2, - self.b, + self.min_intensity, ) W = W + A / k W += 2 * self.sumLambda @@ -300,7 +300,7 @@ def map_lcb_ucb_approx(self, S, n, beta=2.0, delta=0.01): if self.data is None: return ( 0 * xtest[:, 0].view(-1, 1), - self.b + 0 * xtest[:, 0].view(-1, 1), + self.min_intensity + 0 * xtest[:, 0].view(-1, 1), self.B + 0 * xtest[:, 0].view(-1, xtest.size()[0]), ) self.fit_ellipsoid_approx() @@ -335,8 +335,8 @@ def map_lcb_ucb(self, S, n, beta=2.0, delta=0.01): xtest = S.return_discretization(n) if self.data is None: return ( - self.b + 0 * xtest[:, 0].view(-1, 1), - self.b + 0 * xtest[:, 0].view(-1, 1), + self.min_intensity + 0 * xtest[:, 0].view(-1, 1), + self.min_intensity + 0 * xtest[:, 0].view(-1, 1), self.B + 0 * xtest[:, 0].view(-1, 1), ) @@ -467,7 +467,11 @@ def mean_rate(self, S, n=128): k = KernelFunction(gamma=gamma) estimator5 = PoissonRateEstimator( - process, hierarchical_structure, kernel_object=k, B=B, m=m, d=d + hierarchical_structure, + kernel=k, + max_intensity=B, + basis_size_per_dim=m, + d=d, ) estimator4 = PermanentalProcessRateEstimator( diff --git a/stpy/point_processes/poisson/mbr_positive_estimator.py b/stpy/point_processes/poisson/mbr_positive_estimator.py index 886817b..7d71a27 100644 --- a/stpy/point_processes/poisson/mbr_positive_estimator.py +++ b/stpy/point_processes/poisson/mbr_positive_estimator.py @@ -52,7 +52,7 @@ def mean_set(self, S, dt=1.0): emb = self.product_integral(S) * dt mu = torch.trace(emb @ self.rate).view(1, 1) else: - mu = self.b * S.volume() + mu = self.min_intensity * S.volume() return mu def penalized_likelihood(self, threads=4): @@ -221,7 +221,11 @@ def construct_covariance_matrix_bins(self): def mean_var_reg_set(self, S, dt=1.0, beta=2.0, lcb_compute=False): if self.data is None: - return S.volume() * self.b, S.volume() * self.B, S.volume() * self.b + return ( + S.volume() * self.min_intensity, + S.volume() * self.B, + S.volume() * self.min_intensity, + ) if self.approx_fit == False: self.W = self.construct_covariance_matrix() @@ -284,7 +288,7 @@ def band_no_opt(self, S, beta=2.0, dt=1.0, maximization=True): if maximization == True: return S.volume() * dt * self.B else: - return S.volume() * dt * self.b + return S.volume() * dt * self.min_intensity else: emb = self.product_integral(S) cost = torch.trace(self.rate @ emb) @@ -305,7 +309,7 @@ def gap(self, S, actions, w, dt, beta=2.0): """ if self.data is None: - return (self.B - self.b) * S.volume() / w(S) + return (self.B - self.min_intensity) * S.volume() / w(S) if self.ucb_identified == False: print("Recomputing UCB.....") diff --git a/stpy/point_processes/poisson_rate_estimator.py b/stpy/point_processes/poisson_rate_estimator.py index ef33832..c5331e8 100644 --- a/stpy/point_processes/poisson_rate_estimator.py +++ b/stpy/point_processes/poisson_rate_estimator.py @@ -1,7 +1,11 @@ +from typing import Optional import cvxpy as cp import mosek import numpy as np import scipy +from stpy.borel_set import HierarchicalBorelSets +from stpy.embeddings.embedding import Embedding +from stpy.kernels import KernelFunction import torch from autograd_minimize import minimize from quadprog import solve_qp @@ -26,23 +30,22 @@ class PoissonRateEstimator(RateEstimator): def __init__( self, - process, - hierarchy, - d=1, - m=100, - kernel_object=None, - B=1.0, + anchor_hierarchy: HierarchicalBorelSets, + d: int = 1, + basis_size_per_dim: int = 100, + kernel: Optional[KernelFunction] = None, + max_intensity: float = 1.0, s=1.0, jitter=10e-8, - b=0.0, - basis="triangle", - estimator="likelihood", - feedback="count-record", + min_intensity: float = 0.0, + basis: str = "triangle", + estimator: str = "likelihood", + feedback_type: str = "count-record", offset=0.1, uncertainty="laplace", approx=None, - stepsize=None, - embedding=None, + sampling_stepsize=None, + embedding: Optional[Embedding] = None, beta=2.0, sampling="proximal+prox", peeking=True, @@ -50,32 +53,33 @@ def __init__( var_cor_on=True, samples_nystrom=15000, inverted_constraint=False, - steps=None, - dual=True, + langevine_sampling_steps=None, + use_anchors=True, no_anchor_points=1024, U=1.0, - opt="torch", + optimization_library="torch", ): - - self.process = process self.d = d + """ Dimension of the data """ self.s = s - self.b = b - self.B = B + self.b = min_intensity + """ Minimal value of the intensity function """ + self.B = max_intensity + """ Maximal value of the intensity function """ self.U = U - self.stepsize = stepsize + self.stepsize = sampling_stepsize self.sampling = sampling - self.steps = steps - self.opt = opt - self.kernel_object = kernel_object + self.steps = langevine_sampling_steps + self.optimization_library = optimization_library + self.kernel = kernel # set hierarchy self.constraints = constraints - self.hierarchy = hierarchy + self.hierarchy = anchor_hierarchy self.ucb_identified = False self.inverted_constraint = inverted_constraint # approximation self.loglikelihood = 0.0 - self.dual = dual + self.dual = use_anchors self.peeking = peeking self.no_anchor_points = no_anchor_points if beta < 0.0: @@ -87,40 +91,40 @@ def __init__( if basis == "triangle": self.packing = TriangleEmbedding( d, - m, - kernel_object=kernel_object, - B=B, - b=b, + basis_size_per_dim, + kernel_object=kernel, + B=max_intensity, + b=min_intensity, offset=offset, s=np.sqrt(jitter), ) elif basis == "bernstein": self.packing = BernsteinEmbedding( d, - m, - kernel_object=kernel_object, - B=B, - b=b, + basis_size_per_dim, + kernel_object=kernel, + B=max_intensity, + b=min_intensity, offset=offset, s=np.sqrt(jitter), ) elif basis == "splines": self.packing = BernsteinSplinesEmbedding( d, - m, - kernel_object=kernel_object, - B=B, - b=b, + basis_size_per_dim, + kernel_object=kernel, + B=max_intensity, + b=min_intensity, offset=offset, s=np.sqrt(jitter), ) elif basis == "nystrom": self.packing = PositiveNystromEmbeddingBump( d, - m, - kernel_object=kernel_object, - B=B, - b=b, + basis_size_per_dim, + kernel_object=kernel, + B=max_intensity, + b=min_intensity, offset=offset, s=np.sqrt(jitter), samples=samples_nystrom, @@ -128,39 +132,41 @@ def __init__( elif basis == "overlap-splines": self.packing = BernsteinSplinesOverlapping( d, - m, - kernel_object=kernel_object, - B=B, - b=b, + basis_size_per_dim, + kernel_object=kernel, + B=max_intensity, + b=min_intensity, offset=offset, s=np.sqrt(jitter), ) elif basis == "faber": self.packing = FaberSchauderEmbedding( d, - m, - kernel_object=kernel_object, - B=B, - b=b, + basis_size_per_dim, + kernel_object=kernel, + B=max_intensity, + b=min_intensity, offset=offset, s=np.sqrt(jitter), ) elif basis == "optimal-positive": self.packing = OptimalPositiveBasis( d, - m, - kernel_object=kernel_object, - B=B, - b=b, + basis_size_per_dim, + kernel_object=kernel, + B=max_intensity, + b=min_intensity, offset=offset, s=np.sqrt(jitter), samples=samples_nystrom, ) elif basis == "custom": + assert embedding is not None self.packing = embedding else: raise NotImplementedError("The request positive basis is not implemented.") - self.m = m + self.m = basis_size_per_dim + """ Number of basis functions per dimension """ self.data = None self.covariance = False @@ -173,7 +179,7 @@ def __init__( # properties of rate estimator self.estimator = estimator - self.feedback = feedback + self.feedback = feedback_type self.uncertainty = uncertainty self.approx = approx @@ -206,7 +212,7 @@ def __init__( self.global_dt = 0.0 self.anchor_points_emb = self.packing.embed(self.anchor_points) - if feedback == "count-record" and basis != "custom": + if feedback_type == "count-record" and basis != "custom": print("Precomputing phis.") for index_set, set in enumerate(self.basic_sets): self.varphis[index_set, :] = self.packing.integral(set) @@ -284,9 +290,9 @@ def fit_gp(self, threads=4): if self.feedback == "count-record": if self.estimator == "likelihood": - if self.opt == "cvxpy": + if self.optimization_library == "cvxpy": self.penalized_likelihood(threads=threads) - elif self.opt == "torch": + elif self.optimization_library == "torch": self.penalized_likelihood_fast(threads=threads) else: raise NotImplementedError( @@ -1188,7 +1194,7 @@ def sample_hmc(self, steps=1000, stepsize=None, verbose=False): def sample_variational(self, xtest, accuracy=1e-4, verbose=False, samples=1): from stpy.approx_inference.variational_mf import VMF_SGCP - cov_params = [self.kernel_object.kappa, self.kernel_object.gamma] + cov_params = [self.kernel.kappa, self.kernel.gamma] S_borders = np.array([[-1.0, 1.0]]) num_inducing_points = self.m num_integration_points = 256 From e29a590c695a3613e1accf44cf8b3aace77a3734 Mon Sep 17 00:00:00 2001 From: peibensteine Date: Sat, 12 Oct 2024 10:01:11 +0200 Subject: [PATCH 06/39] choose optimization library in fit method --- stpy/point_processes/poisson_rate_estimator.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/stpy/point_processes/poisson_rate_estimator.py b/stpy/point_processes/poisson_rate_estimator.py index c5331e8..b71cb94 100644 --- a/stpy/point_processes/poisson_rate_estimator.py +++ b/stpy/point_processes/poisson_rate_estimator.py @@ -284,15 +284,16 @@ def get_constraints(self): def cov(self, inverse=False): return self.packing.cov(inverse=inverse) - def fit_gp(self, threads=4): + def fit_gp(self, threads=4, optimization_library=None): + optimization_library = optimization_library if optimization_library is not None else self.optimization_library if self.data is not None: if self.feedback == "count-record": if self.estimator == "likelihood": - if self.optimization_library == "cvxpy": + if optimization_library == "cvxpy": self.penalized_likelihood(threads=threads) - elif self.optimization_library == "torch": + elif optimization_library == "torch": self.penalized_likelihood_fast(threads=threads) else: raise NotImplementedError( From 08b1a099205b693496fdcf9e3ac75e9e7fbd1f22 Mon Sep 17 00:00:00 2001 From: peibensteine Date: Sat, 12 Oct 2024 10:59:38 +0200 Subject: [PATCH 07/39] revert to mosek --- stpy/helpers/ellipsoid_algorithms.py | 6 ++- .../point_processes/poisson_rate_estimator.py | 38 +++++++++++++++++-- 2 files changed, 39 insertions(+), 5 deletions(-) diff --git a/stpy/helpers/ellipsoid_algorithms.py b/stpy/helpers/ellipsoid_algorithms.py index c485943..6bf796d 100644 --- a/stpy/helpers/ellipsoid_algorithms.py +++ b/stpy/helpers/ellipsoid_algorithms.py @@ -228,7 +228,11 @@ def maximize_on_elliptical_slice(x, Sigma, mu, c, l, Lambda, u): constraints.append(Lambda @ theta >= l) constraints.append(Lambda @ theta <= u) prob = cp.Problem(obj_max, constraints) - prob.solve(solver=cp.SCS, verbose=True) + prob.solve( + solver=cp.MOSEK, + verbose=False, + mosek_params={mosek.iparam.intpnt_solve_form: mosek.solveform.dual}, + ) val = prob.value theta = theta.value return val, theta diff --git a/stpy/point_processes/poisson_rate_estimator.py b/stpy/point_processes/poisson_rate_estimator.py index b71cb94..60f8c4b 100644 --- a/stpy/point_processes/poisson_rate_estimator.py +++ b/stpy/point_processes/poisson_rate_estimator.py @@ -1,3 +1,4 @@ +import os from typing import Optional import cvxpy as cp import mosek @@ -285,7 +286,11 @@ def cov(self, inverse=False): return self.packing.cov(inverse=inverse) def fit_gp(self, threads=4, optimization_library=None): - optimization_library = optimization_library if optimization_library is not None else self.optimization_library + optimization_library = ( + optimization_library + if optimization_library is not None + else self.optimization_library + ) if self.data is not None: if self.feedback == "count-record": @@ -1349,7 +1354,10 @@ def objective(theta): print(res.message) return self.rate - def penalized_likelihood(self, threads=4): + def penalized_likelihood(self, threads=None): + if threads is None: + cpu_count = os.cpu_count() + threads = max(cpu_count - 2, 1) if cpu_count is not None else 1 theta = cp.Variable(self.get_m()) l, Lambda, u = self.get_constraints() @@ -1411,7 +1419,18 @@ def penalized_likelihood(self, threads=4): theta.value = self.rate.numpy() try: - prob.solve(solver=cp.CLARABEL, warm_start=False, verbose=True) + prob.solve( + solver=cp.MOSEK, + warm_start=False, + verbose=False, + mosek_params={ + mosek.iparam.num_threads: threads, + mosek.iparam.intpnt_solve_form: mosek.solveform.dual, + mosek.dparam.intpnt_co_tol_pfeas: 1e-4, + mosek.dparam.intpnt_co_tol_dfeas: 1e-4, + mosek.dparam.intpnt_co_tol_rel_gap: 1e-4, + }, + ) self.rate = torch.from_numpy(theta.value) return self.rate @@ -1443,7 +1462,18 @@ def penalized_likelihood_integral(self, threads=4): # theta.value = self.rate.numpy() try: prob = cp.Problem(objective, constraints) - prob.solve(solver=cp.CLARABEL, warm_start=False, verbose=True) + prob.solve( + solver=cp.MOSEK, + warm_start=False, + verbose=False, + mosek_params={ + mosek.iparam.num_threads: threads, + mosek.iparam.intpnt_solve_form: mosek.solveform.dual, + mosek.dparam.intpnt_co_tol_pfeas: 1e-4, + mosek.dparam.intpnt_co_tol_dfeas: 1e-4, + mosek.dparam.intpnt_co_tol_rel_gap: 1e-4, + }, + ) self.rate = torch.from_numpy(theta.value) except: print("Optimization failed. Using the old value.") From 605f52ac33182d73e7ea5fa00741a816294fa72d Mon Sep 17 00:00:00 2001 From: peibensteine Date: Sat, 12 Oct 2024 12:38:17 +0200 Subject: [PATCH 08/39] remove weird duplicate handling --- stpy/point_processes/rate_estimator.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/stpy/point_processes/rate_estimator.py b/stpy/point_processes/rate_estimator.py index 1082251..356d537 100644 --- a/stpy/point_processes/rate_estimator.py +++ b/stpy/point_processes/rate_estimator.py @@ -54,11 +54,6 @@ def load_data(self, data: List, times=True): x.append(obs) if obs is not None: - obs, _, duplicates = torch.unique( - obs, dim=0, return_inverse=True, return_counts=True - ) - obs = torch.einsum("ij,i->ij", obs, duplicates) - if times == True: emb = self.packing.embed(obs) * dt else: From c85b6a4b17463da79be7d5bf8941a3e5adc34b02 Mon Sep 17 00:00:00 2001 From: peibensteine Date: Sat, 12 Oct 2024 12:41:46 +0200 Subject: [PATCH 09/39] most minor type fixes --- stpy/embeddings/positive_embedding.py | 4 ++-- stpy/point_processes/poisson/poisson.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/stpy/embeddings/positive_embedding.py b/stpy/embeddings/positive_embedding.py index 340c094..f41c332 100644 --- a/stpy/embeddings/positive_embedding.py +++ b/stpy/embeddings/positive_embedding.py @@ -69,8 +69,8 @@ def __init__( def get_size(self): return self.m**self.d - def integral(self, S): - pass + def integral(self, S) -> torch.Tensor: + raise NotImplementedError("Subclasses should implement this method.") def basis_fun(self, x, j): """ diff --git a/stpy/point_processes/poisson/poisson.py b/stpy/point_processes/poisson/poisson.py index 843602c..52c6576 100644 --- a/stpy/point_processes/poisson/poisson.py +++ b/stpy/point_processes/poisson/poisson.py @@ -11,7 +11,7 @@ class PoissonPointProcess: """ - def __init__(self, d=1, B=1, b=0.2, rate=None, rate_volume=None): + def __init__(self, d=1.0, B=1.0, b=0.2, rate=None, rate_volume=None): self.B = B self.d = d self.b = b From 1f735cc968f6e5219873bf16f307f225bebcb105 Mon Sep 17 00:00:00 2001 From: peibensteine Date: Sat, 12 Oct 2024 13:28:02 +0200 Subject: [PATCH 10/39] Optimize objective and add option to move to gpu --- .../point_processes/poisson_rate_estimator.py | 56 ++++++++++++------- 1 file changed, 36 insertions(+), 20 deletions(-) diff --git a/stpy/point_processes/poisson_rate_estimator.py b/stpy/point_processes/poisson_rate_estimator.py index 60f8c4b..fd0db1e 100644 --- a/stpy/point_processes/poisson_rate_estimator.py +++ b/stpy/point_processes/poisson_rate_estimator.py @@ -285,7 +285,12 @@ def get_constraints(self): def cov(self, inverse=False): return self.packing.cov(inverse=inverse) - def fit_gp(self, threads=4, optimization_library=None): + def fit_gp( + self, + threads=4, + optimization_library=None, + device: torch.device = torch.get_default_device(), + ): optimization_library = ( optimization_library if optimization_library is not None @@ -299,7 +304,7 @@ def fit_gp(self, threads=4, optimization_library=None): if optimization_library == "cvxpy": self.penalized_likelihood(threads=threads) elif optimization_library == "torch": - self.penalized_likelihood_fast(threads=threads) + self.penalized_likelihood_fast(device=device) else: raise NotImplementedError( "The optimization method does not exist" @@ -1274,58 +1279,68 @@ def sampled_lcb_ucb(self, xtest, samples=100, delta=0.1): ucb = torch.quantile(paths, 1 - delta, dim=0) return lcb, ucb - def penalized_likelihood_fast(self, threads=4): + def penalized_likelihood_fast( + self, device: torch.device = torch.get_default_device() + ): l, Lambda, u = self.get_constraints() Gamma_half, invGamma_half = self.cov(inverse=True) + invGamma_half = invGamma_half.to(device) + + s = self.s * 0.5 if self.dual == False: + p = self.phis.to(device) @ invGamma_half # using all points without anchor points if self.observations is not None: + o = self.observations.to(device) @ invGamma_half def objective(theta): return ( - -torch.sum(torch.log(self.observations @ invGamma_half @ theta)) - + torch.sum(self.phis @ invGamma_half @ theta) - + self.s * 0.5 * torch.sum((invGamma_half @ theta) ** 2) + -torch.sum(torch.log(o @ theta)) + + torch.sum(p @ theta) + + s * torch.sum((invGamma_half @ theta) ** 2) ) else: def objective(theta): - return torch.sum( - self.phis @ invGamma_half @ theta - ) + self.s * 0.5 * torch.sum((invGamma_half @ theta) ** 2) + return torch.sum(p @ theta) + s * torch.sum( + (invGamma_half @ theta) ** 2 + ) else: # using anchor points mask = self.bucketized_counts > 0 phis = self.varphis[mask, :] - tau = self.total_bucketized_time[mask] + tau = self.total_bucketized_time[mask].to(device) + p = phis @ invGamma_half if self.observations is not None: - observations = self.anchor_points_emb - weights = self.anchor_weights + observations = self.anchor_points_emb.to(device) + weights = self.anchor_weights.to(device) mask = weights > 0.0 + o = observations[mask, :] @ invGamma_half + def objective(theta): return ( -torch.einsum( "i,i", weights[mask], - torch.log(observations[mask, :] @ invGamma_half @ theta), + torch.log(o @ theta), ) - + torch.einsum("i,i", tau, phis @ invGamma_half @ theta) - + self.s * 0.5 * torch.sum((invGamma_half @ theta) ** 2) + + torch.einsum("i,i", tau, p @ theta) + + s * torch.sum((invGamma_half @ theta) ** 2) ) else: def objective(theta): - return torch.einsum( - "i,i", tau, phis @ invGamma_half @ theta - ) + self.s * 0.5 * torch.sum((invGamma_half @ theta) ** 2) + return torch.einsum("i,i", tau, p @ theta) + s * torch.sum( + (invGamma_half @ theta) ** 2 + ) - if self.rate is not None: + if isinstance(self.rate, torch.Tensor): theta0 = torch.zeros(size=(self.get_m(), 1)).view(-1).double() theta0.data = self.rate.data else: @@ -1340,6 +1355,7 @@ def objective(theta): bounds=(l[0] + eps, u[0]), precision="float64", tol=1e-8, + torch_device=str(device), options={ "ftol": 1e-08, "gtol": 1e-08, @@ -1350,7 +1366,7 @@ def objective(theta): }, ) - self.rate = invGamma_half @ torch.from_numpy(res.x) + self.rate = invGamma_half.cpu() @ torch.from_numpy(res.x) print(res.message) return self.rate From e37f339c61f99725d15f34a58caaa66fb7d08477 Mon Sep 17 00:00:00 2001 From: peibensteine Date: Sun, 13 Oct 2024 18:24:51 +0200 Subject: [PATCH 11/39] Efficient Triangle embedding class --- stpy/embeddings/triangle_base.py | 85 +++++++++++++++++++++++++++++++ tests/efficient_triangle_basis.py | 31 +++++++++++ 2 files changed, 116 insertions(+) create mode 100644 stpy/embeddings/triangle_base.py create mode 100644 tests/efficient_triangle_basis.py diff --git a/stpy/embeddings/triangle_base.py b/stpy/embeddings/triangle_base.py new file mode 100644 index 0000000..251f538 --- /dev/null +++ b/stpy/embeddings/triangle_base.py @@ -0,0 +1,85 @@ +import numpy as np +import scipy +import torch + +from stpy.borel_set import BorelSet +from stpy.continuous_processes.nystrom_fea import NystromFeatures +from stpy.embeddings.positive_embedding import PositiveEmbedding +from stpy.kernels import KernelFunction + + +class EfficientTriangleEmbedding(PositiveEmbedding): + + def __init__(self, *args, **kwargs): + + super().__init__(*args, **kwargs) + + self._t = torch.linspace( + self.interval[0], self.interval[1], steps=self.m, dtype=torch.float64 + ) + self._dm = (self.interval[1] - self.interval[0]) / (self.m - 1) + + def basis_fun(self, x: torch.Tensor, j: int): + r""" + Return the value of 1d basis function $\phi_{j}$ + over all dimensions of x + + :param x: double, need to be in the interval + :param j: integer, index of hat functions, 0 <= j <= m-1 + :return: $\{\phi_j(x_1), \ldots, \phi_j(x_n)}$ + """ + res = torch.clamp(1 - torch.abs((x - self._t[j]) / self._dm), min=0) + return res + + def integrate_1d(self, a: torch.Tensor, b: torch.Tensor, t: torch.Tensor): + """ + :param l: from + :param u: to + :param t: tensor of triangle centers + :return: 1d integral over triangle basis functions given by centers and self.dm + """ + + def rising_integral(x): + return (x - t + self._dm) ** 2 / (self._dm * 2.0) + + def falling_integral(x): + return -((x - t - self._dm) ** 2) / (self._dm * 2.0) + + i = rising_integral(torch.clamp(b, t - self._dm, t)) - rising_integral( + torch.clamp(a, t - self._dm, t) + ) + i += falling_integral(torch.clamp(b, t, t + self._dm)) - falling_integral( + torch.clamp(a, t, t + self._dm) + ) + + return i + + def integral(self, S): + """ + Integrate the Phi(x) over S + :param S: borel set + :return: $\int_S \Phi(x) dx$ + """ + if S in self.procomp_integrals.keys(): + return self.procomp_integrals[S] + + else: + assert S.d == self.d + psi = torch.ones(self.m).double() + if S.type == "box": + psi = torch.tensor([1.0]).double() + for i in range(self.d): + a, b = S.bounds[i, 0].double(), S.bounds[i, 1].double() + p = self.integrate_1d(a, b, self._t) + # multiply each with each element and flatten + psi = torch.outer(psi, p).flatten() + + elif S.type == "round": + weights, nodes = S.return_legendre_discretization(30) + vals = self.embed_internal(nodes) + psi = weights.view(1, -1) @ vals + + Gamma_half = self.cov() + emb = psi @ Gamma_half + self.procomp_integrals[S] = emb + return emb diff --git a/tests/efficient_triangle_basis.py b/tests/efficient_triangle_basis.py new file mode 100644 index 0000000..13faf5c --- /dev/null +++ b/tests/efficient_triangle_basis.py @@ -0,0 +1,31 @@ +from stpy.borel_set import BorelSet, HierarchicalBorelSets +from stpy.embeddings.bump_bases import TriangleEmbedding +from stpy.embeddings.triangle_base import EfficientTriangleEmbedding +import torch + + +if __name__ == "__main__": + d = 1 + m = 2 + S = BorelSet(1, torch.tensor([[-1, 1]])) + + inefficient = TriangleEmbedding(d=d, m=m, interval=(-1, 1)) + efficient = EfficientTriangleEmbedding(d, m, interval=(-1, 1)) + + for x, j in [(0.5, 1), (0.1, 0)]: + x = torch.tensor(x) + assert inefficient.basis_fun(x, j) == efficient.basis_fun(x, j) + + for set in [[-1, 1], [-0.25, 1], [-2, 2]]: + s = BorelSet(1, torch.tensor([set])) + assert torch.all(inefficient.integral(s) == efficient.integral(s)) + + d = 2 + m = 2 + + inefficient = TriangleEmbedding(d=d, m=m, interval=(-1, 1)) + efficient = EfficientTriangleEmbedding(d, m, interval=(-1, 1)) + + for x, j in [([0.5, 0.1], 1), ([0.7, 0.1], 0)]: + x = torch.tensor(x) + assert torch.all(inefficient.basis_fun(x, j) == efficient.basis_fun(x, j)) From ded12a20b8f06c77ba1aab95b03cab438745b2f8 Mon Sep 17 00:00:00 2001 From: peibensteine Date: Thu, 17 Oct 2024 12:14:29 +0200 Subject: [PATCH 12/39] Make comment strings raw if they contain a backslash --- stpy/embeddings/bernstein_embedding.py | 6 +++--- stpy/embeddings/bump_bases.py | 10 +++++----- stpy/embeddings/embedding.py | 2 +- stpy/embeddings/polynomial_embedding.py | 2 +- stpy/embeddings/positive_embedding.py | 2 +- stpy/embeddings/triangle_base.py | 2 +- stpy/helpers/ellipsoid_algorithms.py | 16 ++++++++-------- stpy/optim/custom_optimizers.py | 2 +- 8 files changed, 21 insertions(+), 21 deletions(-) diff --git a/stpy/embeddings/bernstein_embedding.py b/stpy/embeddings/bernstein_embedding.py index 2da90a5..68467c6 100644 --- a/stpy/embeddings/bernstein_embedding.py +++ b/stpy/embeddings/bernstein_embedding.py @@ -11,7 +11,7 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) def basis_fun(self, x, j): # 1d - """ + r""" Return the value of basis function \phi_j(x) :param x: double, need to be in the interval :param j: integer, index of hat functions, 0 <= j <= m-1 @@ -136,7 +136,7 @@ def __init__(self, *args, degree=4, **kwargs): self.degree = degree def basis_fun(self, x, q, derivative=0, extrapolate=False): # 1d - """ + r""" Return the value of basis function \phi_j(x) :param x: double, need to be in the interval @@ -228,7 +228,7 @@ def __init__(self, *args, degree=4, derivatives=2, **kwargs): # def basis_fun(self, x, j, k, derivative = 0, extrapolate = False): #1d def basis_fun(self, x, q, derivative=0, extrapolate=False): # 1d - """ + r""" Return the value of basis function \phi_j(x) :param x: double, need to be in the interval diff --git a/stpy/embeddings/bump_bases.py b/stpy/embeddings/bump_bases.py index 587daf4..7b99443 100644 --- a/stpy/embeddings/bump_bases.py +++ b/stpy/embeddings/bump_bases.py @@ -16,7 +16,7 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) def basis_fun(self, x, j): - """ + r""" Return the value of basis function \phi_j(x) :param x: double, need to be in the interval @@ -74,10 +74,10 @@ def integrate_1d(self, a, b, tj, dm): return vol def integral(self, S): - """ + r""" Integrate the Phi(x) over S :param S: borel set - :return: + :return: $\int_S \Phi(x) dx$ """ if S in self.procomp_integrals.keys(): return self.procomp_integrals[S] @@ -150,7 +150,7 @@ def __init__(self, *args, **kwargs): raise AssertionError("This basis works only with log_2(n) is integer.") def basis_fun(self, x, j): - """ + r""" Return the value of basis function \phi_j(x) :param x: double, need to be in the interval @@ -283,7 +283,7 @@ def integral(self, S): psi[j] = vol def basis_fun(self, x, j): # 1d - """ + r""" Return the value of basis function \phi_j(x) :param x: double, need to be in the interval diff --git a/stpy/embeddings/embedding.py b/stpy/embeddings/embedding.py index 45e0dcf..021dfa7 100755 --- a/stpy/embeddings/embedding.py +++ b/stpy/embeddings/embedding.py @@ -6,7 +6,7 @@ __email__ = "mojmir.mutny@inf.ethz.ch" __status__ = "DEV" -""" +r""" This file implements code used in paper: Mojmir Mutny & Andreas Krause, "Efficient High Dimensional Bayesian Optimization diff --git a/stpy/embeddings/polynomial_embedding.py b/stpy/embeddings/polynomial_embedding.py index 5fbd50c..2634641 100755 --- a/stpy/embeddings/polynomial_embedding.py +++ b/stpy/embeddings/polynomial_embedding.py @@ -6,7 +6,7 @@ __email__ = "mojmir.mutny@inf.ethz.ch" __status__ = "DEV" -""" +r""" This file implements a polynomial embedding k(x,y) = \Phi(x)^\top \Phi(y) for kernels of the form (x^\top y + 1)^p diff --git a/stpy/embeddings/positive_embedding.py b/stpy/embeddings/positive_embedding.py index f41c332..2c2d510 100644 --- a/stpy/embeddings/positive_embedding.py +++ b/stpy/embeddings/positive_embedding.py @@ -73,7 +73,7 @@ def integral(self, S) -> torch.Tensor: raise NotImplementedError("Subclasses should implement this method.") def basis_fun(self, x, j): - """ + r""" Return the value of basis function \phi_j(x) :param x: double, need to be in the interval diff --git a/stpy/embeddings/triangle_base.py b/stpy/embeddings/triangle_base.py index 251f538..2e43676 100644 --- a/stpy/embeddings/triangle_base.py +++ b/stpy/embeddings/triangle_base.py @@ -55,7 +55,7 @@ def falling_integral(x): return i def integral(self, S): - """ + r""" Integrate the Phi(x) over S :param S: borel set :return: $\int_S \Phi(x) dx$ diff --git a/stpy/helpers/ellipsoid_algorithms.py b/stpy/helpers/ellipsoid_algorithms.py index 6bf796d..2514883 100644 --- a/stpy/helpers/ellipsoid_algorithms.py +++ b/stpy/helpers/ellipsoid_algorithms.py @@ -7,7 +7,7 @@ def maximum_volume_ellipsoid_l1_polytope_ellipse(ellipse, l1_polytope, verbose=False): - """ + r""" ellipse is xA_ix + 2b_i x + c_i \leq 0 @@ -129,7 +129,7 @@ def maximum_volume_ellipsoid_relu_polytope_ellipse( def maximum_volume_ellipsoid_intersection_ellipsoids( ellipses, planes=None, verbose=False ): - """ + r""" Each ellipse is xA_ix + 2b_i x + c_i \leq 0 @@ -185,7 +185,7 @@ def maximum_volume_ellipsoid_intersection_ellipsoids( def ellipsoid_cut(c, B, a, beta): - """ + r""" :param c: elipsoid center :param B: elipsoid covariance :param a: a @@ -212,7 +212,7 @@ def ellipsoid_cut(c, B, a, beta): def maximize_on_elliptical_slice(x, Sigma, mu, c, l, Lambda, u): - """ + r""" solves the problem min x^\top \theta s.t. (\theta - \mu)Sigma(\theta - \mu) \leq c @@ -239,7 +239,7 @@ def maximize_on_elliptical_slice(x, Sigma, mu, c, l, Lambda, u): def maximize_matrix_quadratic_on_ellipse(X, Sigma, mu, c, threads=4): - """ + r""" solves the problem max \theta ^top Z \theta s.t. (\theta - \mu)Sigma(\theta - \mu) \leq c @@ -251,7 +251,7 @@ def maximize_matrix_quadratic_on_ellipse(X, Sigma, mu, c, threads=4): def minimize_matrix_quadratic_on_ellipse(Z, Sigma, mu, c, threads=4): - """ + r""" solves the problem min \theta ^top Z \theta s.t. (\theta - \mu)Sigma(\theta - \mu) \leq c @@ -278,7 +278,7 @@ def minimize_matrix_quadratic_on_ellipse(Z, Sigma, mu, c, threads=4): def maximize_quadratic_on_ellipse(x, Sigma, mu, c, threads=4): - """ + r""" solves the problem max (x^\top \theta)^2 s.t. (\theta - \mu)Sigma(\theta - \mu) \leq c @@ -291,7 +291,7 @@ def maximize_quadratic_on_ellipse(x, Sigma, mu, c, threads=4): def minimize_quadratic_on_ellipse(x, Sigma, mu, c, threads=4): - """ + r""" solves the problem min (x^\top \theta)^2 s.t. (\theta - \mu)Sigma(\theta - \mu) \leq c diff --git a/stpy/optim/custom_optimizers.py b/stpy/optim/custom_optimizers.py index b3f5bc7..043ae8e 100644 --- a/stpy/optim/custom_optimizers.py +++ b/stpy/optim/custom_optimizers.py @@ -130,7 +130,7 @@ def QPQC_problem(A, a, s, Sigma=None): def convex_QCQP(A, a, s, Sigma=None, threads=4, verbose=False): - """ + r""" Solving min xAx - 2ax From a9d2e01a401fce99b350245c483facd35602cb13 Mon Sep 17 00:00:00 2001 From: peibensteine Date: Sun, 10 Nov 2024 11:47:21 +0100 Subject: [PATCH 13/39] more data type robustness --- stpy/embeddings/bump_bases.py | 4 ++-- stpy/embeddings/positive_embedding.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/stpy/embeddings/bump_bases.py b/stpy/embeddings/bump_bases.py index 7b99443..48b08a5 100644 --- a/stpy/embeddings/bump_bases.py +++ b/stpy/embeddings/bump_bases.py @@ -92,7 +92,7 @@ def integral(self, S): for j in range(self.get_m()): tj = self.interval[0] + j * dm vol = self.integrate_1d(a.numpy(), b.numpy(), tj, dm) - psi[j] = vol + psi[j] = float(vol) elif self.d == 2: dm = (self.interval[1] - self.interval[0]) / (self.m - 1) # delta m @@ -108,7 +108,7 @@ def integral(self, S): # center_point = torch.Tensor( [tj,tk]).view(-1,1) vol = self.integrate_1d(xa.numpy(), xb.numpy(), tk, dm) vol2 = self.integrate_1d(ya.numpy(), yb.numpy(), tj, dm) - psi[j] = vol * vol2 + psi[j] = float(vol * vol2) # if torch.sum(S.is_inside(center_point)): # psi[j] = (dm**2)/3. else: diff --git a/stpy/embeddings/positive_embedding.py b/stpy/embeddings/positive_embedding.py index 2c2d510..635af03 100644 --- a/stpy/embeddings/positive_embedding.py +++ b/stpy/embeddings/positive_embedding.py @@ -19,8 +19,8 @@ def __init__( m, kernel_object: Optional[KernelFunction] = None, interval=(-1, 1), - B=1, - b=0, + B=1.0, + b=0.0, s=0.001, offset=0.0, ): From fb268859336eb1403d03273639fc264d4e5b3e1c Mon Sep 17 00:00:00 2001 From: peibensteine Date: Sun, 10 Nov 2024 11:47:44 +0100 Subject: [PATCH 14/39] link_fun_rate running --- stpy/point_processes/link_fun_rate_estimator.py | 4 ++-- stpy/point_processes/poisson/__init__.py | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/stpy/point_processes/link_fun_rate_estimator.py b/stpy/point_processes/link_fun_rate_estimator.py index afac5c7..c5a5b69 100644 --- a/stpy/point_processes/link_fun_rate_estimator.py +++ b/stpy/point_processes/link_fun_rate_estimator.py @@ -17,14 +17,14 @@ minimize_matrix_quadratic_on_ellipse, ) from stpy.point_processes.poisson import PoissonPointProcess -from stpy.point_processes.poisson_rate_estimator import PositiveRateEstimator +from stpy.point_processes.poisson_rate_estimator import PoissonRateEstimator from stpy.borel_set import BorelSet, HierarchicalBorelSets from stpy.kernels import KernelFunction ## implement loading data -class PermanentalProcessRateEstimator(PositiveRateEstimator): +class PermanentalProcessRateEstimator(PoissonRateEstimator): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) diff --git a/stpy/point_processes/poisson/__init__.py b/stpy/point_processes/poisson/__init__.py index e69de29..32fcc1a 100644 --- a/stpy/point_processes/poisson/__init__.py +++ b/stpy/point_processes/poisson/__init__.py @@ -0,0 +1 @@ +from .poisson import PoissonPointProcess From 5f062a3acc68f147a89946ebf4eb28dd3fc92e99 Mon Sep 17 00:00:00 2001 From: peibensteine Date: Sun, 10 Nov 2024 11:48:00 +0100 Subject: [PATCH 15/39] fix efficient triangle basis test --- tests/efficient_triangle_basis.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/tests/efficient_triangle_basis.py b/tests/efficient_triangle_basis.py index 13faf5c..305a5a2 100644 --- a/tests/efficient_triangle_basis.py +++ b/tests/efficient_triangle_basis.py @@ -13,12 +13,14 @@ efficient = EfficientTriangleEmbedding(d, m, interval=(-1, 1)) for x, j in [(0.5, 1), (0.1, 0)]: - x = torch.tensor(x) - assert inefficient.basis_fun(x, j) == efficient.basis_fun(x, j) + x = torch.tensor(x, dtype=torch.float64) + assert torch.allclose( + inefficient.basis_fun(x, j).double(), efficient.basis_fun(x, j) + ) for set in [[-1, 1], [-0.25, 1], [-2, 2]]: s = BorelSet(1, torch.tensor([set])) - assert torch.all(inefficient.integral(s) == efficient.integral(s)) + assert torch.allclose(inefficient.integral(s), efficient.integral(s)) d = 2 m = 2 @@ -27,5 +29,7 @@ efficient = EfficientTriangleEmbedding(d, m, interval=(-1, 1)) for x, j in [([0.5, 0.1], 1), ([0.7, 0.1], 0)]: - x = torch.tensor(x) - assert torch.all(inefficient.basis_fun(x, j) == efficient.basis_fun(x, j)) + x = torch.tensor(x, dtype=torch.float64) + assert torch.allclose( + inefficient.basis_fun(x, j).double(), efficient.basis_fun(x, j) + ) From 111a717316bbaa6db45ef5d2ce4eb7abf1889309 Mon Sep 17 00:00:00 2001 From: peibensteine Date: Sun, 10 Nov 2024 11:50:08 +0100 Subject: [PATCH 16/39] precompute phis with custom basis, comments and sampling as list --- .../point_processes/poisson_rate_estimator.py | 48 +++++++++++++------ 1 file changed, 33 insertions(+), 15 deletions(-) diff --git a/stpy/point_processes/poisson_rate_estimator.py b/stpy/point_processes/poisson_rate_estimator.py index fd0db1e..2792e06 100644 --- a/stpy/point_processes/poisson_rate_estimator.py +++ b/stpy/point_processes/poisson_rate_estimator.py @@ -192,11 +192,11 @@ def __init__( self.variances_histogram = [] self.observations = None self.rate = None + r""" $\hat \theta$ in the paper""" self.W = (s) * torch.eye(self.get_m()).double() self.W_inv_approx = (1.0 / s) * torch.eye(self.get_m()).double() self.beta_value = 2.0 self.sampled_theta = None - if self.dual == True: if self.d == 1: anchor = no_anchor_points @@ -213,7 +213,7 @@ def __init__( self.global_dt = 0.0 self.anchor_points_emb = self.packing.embed(self.anchor_points) - if feedback_type == "count-record" and basis != "custom": + if feedback_type == "count-record": print("Precomputing phis.") for index_set, set in enumerate(self.basic_sets): self.varphis[index_set, :] = self.packing.integral(set) @@ -224,6 +224,19 @@ def __init__( print("Precomputation finished.") def add_data_point(self, new_data, times=True): + r""" + Takes data in the format (area: BorelSet, data_points: Tensor, time_delta: float) + where data_points is a 2d tensor, with number of columns equal to d + and number of rows equal to the number of point observations + + It triggers a re-fitting of the approximation parameters $\hat \theta$ + and adds + + - the integral over the sensing area plus the log of the integral over the sensing area if the data is of type histogram + - the integral over the sensing are plus the sum of the rate function at the datapoints if the data is of type count-record + + to `self.loglikelihood` + """ super().add_data_point(new_data, times=times) @@ -588,6 +601,8 @@ def prox(x): # def prox(x): # return Gamma_half @ torch.from_numpy(scipy.optimize.nnls(invGamma.numpy(), (invGamma_half@x).numpy().reshape(-1), maxiter = 1000)[0]).view(-1,1) + samples = [] + if self.data is not None: if self.feedback == "count-record" and self.dual == False: if self.observations is not None: @@ -686,8 +701,12 @@ def prox(x): if verbose == True: print("Iter:", k, theta.T) + samples.append(prox(theta)) + self.sampled_theta = prox(theta) + return samples + def sample_proximal_langevin_simple_prox(self, steps=300, verbose=False): Gamma_half, invGamma_half = self.packing.cov(inverse=True) @@ -1225,11 +1244,11 @@ def sample_variational(self, xtest, accuracy=1e-4, verbose=False, samples=1): sample_paths = var_mf_sgcp.sample_posterior(xtest, num_samples=1.0) return sample_paths - def sample(self, verbose=False, steps=1000, domain=None): + def sample(self, verbose=False, steps=None, domain=None): """ :return: """ - if self.steps is not None: + if steps is None: steps = self.steps if self.stepsize is not None: @@ -1238,34 +1257,33 @@ def sample(self, verbose=False, steps=1000, domain=None): stepsize = None l, Lambda, u = self.get_constraints() - print("Sampling started.") if self.rate is None: self.fit_gp() if self.sampling == "mirror": - self.sample_mirror_langevin(steps=steps, verbose=verbose) + r = self.sample_mirror_langevin(steps=steps, verbose=verbose) elif self.sampling == "proximal+prox": - self.sample_proximal_langevin_prox(steps=steps, verbose=verbose) + r = self.sample_proximal_langevin_prox(steps=steps, verbose=verbose) elif self.sampling == "proximal+simple_prox": - self.sample_proximal_langevin_simple_prox(steps=steps, verbose=verbose) + r = self.sample_proximal_langevin_simple_prox(steps=steps, verbose=verbose) elif self.sampling == "hessian": - self.sample_hessian_positive_langevin( + r = self.sample_hessian_positive_langevin( steps=steps, verbose=verbose, stepsize=stepsize ) elif self.sampling == "hessian2": - self.sample_hessian_positive_langevin_2( + r = self.sample_hessian_positive_langevin_2( steps=steps, verbose=verbose, stepsize=stepsize ) elif self.sampling == "mla_prime": - self.sample_mla_prime(steps=steps, verbose=verbose, stepsize=stepsize) + r = self.sample_mla_prime(steps=steps, verbose=verbose, stepsize=stepsize) elif self.sampling == "hmc": - self.sample_hmc(steps=steps, verbose=verbose, stepsize=stepsize) + r = self.sample_hmc(steps=steps, verbose=verbose, stepsize=stepsize) elif self.sampling == "polyia_variational": - self.sample_variational(accuracy=1.0 / steps, verbose=verbose) + r = self.sample_variational(accuracy=1.0 / steps, verbose=verbose) else: raise NotImplementedError("Sampling of such is not supported.") - print("Sampling finished.") + return r def sampled_lcb_ucb(self, xtest, samples=100, delta=0.1): paths = [] @@ -1349,7 +1367,7 @@ def objective(theta): eps = 1e-4 res = minimize( objective, - theta0.numpy(), + theta0.cpu().numpy(), backend="torch", method="L-BFGS-B", bounds=(l[0] + eps, u[0]), From 2b0631a13bc9cbe2486000158696b93c49be9361 Mon Sep 17 00:00:00 2001 From: peibensteine Date: Thu, 21 Nov 2024 16:56:18 +0100 Subject: [PATCH 17/39] fix link_fun_rate i.e. implement integral approximation --- stpy/point_processes/link_fun_rate_estimator.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/stpy/point_processes/link_fun_rate_estimator.py b/stpy/point_processes/link_fun_rate_estimator.py index c5a5b69..538e2f2 100644 --- a/stpy/point_processes/link_fun_rate_estimator.py +++ b/stpy/point_processes/link_fun_rate_estimator.py @@ -1,3 +1,4 @@ +from typing import List import numpy as np import torch import scipy @@ -215,6 +216,9 @@ def load_data(self, data): for sample in data: (S, obs, dt) = sample self.sumLambda += self.product_integral(S) * dt + else: + self.S = data[0][0] + assert isinstance(self.S, BorelSet) def add_data_point(self, new_data): super().add_data_point(new_data, times=False) @@ -405,14 +409,16 @@ def mean_rate(self, S, n=128): class ExpGaussProcessRateEstimator(PermanentalProcessRateEstimator): def penalized_likelihood(self, threads=4): - weights = self.weights.numpy() - nodes = self.nodes.numpy() + # Get node function values and weights for Gauss-Legendre quadrature + weights, nodes = self.S.return_legendre_discretization(n=50) + weights = np.array(weights) + vals = np.array(self.packing.embed_internal(nodes)) if self.observations is not None: observations = self.observations.numpy() loss = lambda theta: float( np.sum(observations @ theta) - + np.sum(weights * np.exp(-theta @ nodes.T)) + + np.sum(weights * np.exp(-theta @ vals)) + self.s * np.sum(theta**2) ) else: From 9470fe32ec2a3f830c6d2a6631152802f1090ed8 Mon Sep 17 00:00:00 2001 From: peibensteine Date: Thu, 21 Nov 2024 16:56:49 +0100 Subject: [PATCH 18/39] precompute even without using bins --- stpy/point_processes/poisson_rate_estimator.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/stpy/point_processes/poisson_rate_estimator.py b/stpy/point_processes/poisson_rate_estimator.py index 2792e06..c065f4f 100644 --- a/stpy/point_processes/poisson_rate_estimator.py +++ b/stpy/point_processes/poisson_rate_estimator.py @@ -213,13 +213,11 @@ def __init__( self.global_dt = 0.0 self.anchor_points_emb = self.packing.embed(self.anchor_points) - if feedback_type == "count-record": + if feedback_type == "count-record" and self.dual: print("Precomputing phis.") for index_set, set in enumerate(self.basic_sets): self.varphis[index_set, :] = self.packing.integral(set) self.variances[index_set] = set.volume() * self.B - else: - pass print("Precomputation finished.") @@ -581,7 +579,7 @@ def prox(x): np.eye(self.get_m()), x.numpy().reshape(-1), C=Gamma_half.numpy(), - b=l.numpy(), + b=np.array(l), factorized=True, ) return torch.from_numpy(res[0]).view(-1, 1) From 918b2cd2236532744235e479d57c9a32cd56ed87 Mon Sep 17 00:00:00 2001 From: peibensteine Date: Thu, 21 Nov 2024 16:58:23 +0100 Subject: [PATCH 19/39] slight doc update --- stpy/point_processes/rate_estimator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stpy/point_processes/rate_estimator.py b/stpy/point_processes/rate_estimator.py index 356d537..7f3dc1f 100644 --- a/stpy/point_processes/rate_estimator.py +++ b/stpy/point_processes/rate_estimator.py @@ -17,7 +17,7 @@ def get_min_max(self): return (np.min(volumes), np.max(volumes)) def load_data(self, data: List, times=True): - r"""Load the data and save $\Phi(x)$ into `self.observations`, $n(A_i)$ in + r"""Load the data and save $\phi(x)$ into `self.observations`, $n(A_i)$ in `self.counts` and $\int_{A_i} \phi_j(x) dx$ into `self.phis` From 5ee811a9067ee785304970130cebbfcdac862b43 Mon Sep 17 00:00:00 2001 From: peibensteine Date: Thu, 21 Nov 2024 16:58:33 +0100 Subject: [PATCH 20/39] add kernel integral for rbf --- .../squared_exponential_kernel.py | 91 ++++++++++++++++++- stpy/kernels.py | 7 ++ 2 files changed, 97 insertions(+), 1 deletion(-) diff --git a/stpy/kernel_functions/squared_exponential_kernel.py b/stpy/kernel_functions/squared_exponential_kernel.py index cb5ae7e..f0d8db0 100644 --- a/stpy/kernel_functions/squared_exponential_kernel.py +++ b/stpy/kernel_functions/squared_exponential_kernel.py @@ -1,4 +1,3 @@ -import numpy as np import torch from stpy.kernel_functions.kernel_params import KernelParams @@ -38,3 +37,93 @@ def squared_exponential_kernel_diag(a, b, **kwargs): arg = (-0.5 / (p.gamma * p.gamma)) * sqdist res = torch.exp(arg) return p.kappa * res + + +def squared_exponential_integral(a_x, a_y, b_x, b_y, **kwargs): + """ + Returns a function that computes g(x) for multiple 2D points x given lower and upper bounds. + + Parameters: + - a_x: torch.Tensor, lower bounds in x-dimension (shape: [N]) + - a_y: torch.Tensor, lower bounds in y-dimension (shape: [N]) + - b_x: torch.Tensor, upper bounds in x-dimension (shape: [N]) + - b_y: torch.Tensor, upper bounds in y-dimension (shape: [N]) + - kwargs: should give attributes gamma (float) and kappa (float) + + Returns: + - A function `g(x)` that computes g(x) for input x (torch.Tensor of shape [M, 2]). + """ + p = KernelParams(kwargs) + p.assert_existence(["gamma", "kappa"]) + gamma = p.gamma + kappa = p.kappa + + def g(x): + """ + Compute the integral g(x) for multiple 2D points x. + + Parameters: + - x: torch.Tensor, input points of shape [M, 2] where each row is a 2D point. + + Returns: + - torch.Tensor of shape [len(a_x), len(x)], where result[i][j] is g(x_j) for bounds from a_x[i], a_y[i], b_x[i], b_y[i]. + """ + x1, x2 = x[:, 0], x[:, 1] # Extract x1 and x2 from input tensor x + a_x_broadcast = a_x.unsqueeze(1) # Shape [N, 1] + a_y_broadcast = a_y.unsqueeze(1) # Shape [N, 1] + b_x_broadcast = b_x.unsqueeze(1) # Shape [N, 1] + b_y_broadcast = b_y.unsqueeze(1) # Shape [N, 1] + + # Compute the error function terms + erf_x1_a = torch.erf((a_x_broadcast - x1) * torch.sqrt(torch.tensor(gamma))) + erf_x1_b = torch.erf((b_x_broadcast - x1) * torch.sqrt(torch.tensor(gamma))) + erf_x2_a = torch.erf((a_y_broadcast - x2) * torch.sqrt(torch.tensor(gamma))) + erf_x2_b = torch.erf((b_y_broadcast - x2) * torch.sqrt(torch.tensor(gamma))) + + # Compute the product of error function differences + integral_values = (erf_x1_a - erf_x1_b) * (erf_x2_a - erf_x2_b) + + # Scale by constants + result = (torch.pi * kappa / (4 * gamma)) * integral_values + + return result + + return g + + +if __name__ == "__main__": + # Test squared_exponential_integral + a_x = torch.tensor([-float("inf"), -float("inf")]) + a_y = torch.tensor([-float("inf"), -float("inf")]) + b_x = torch.tensor([float("inf"), float("inf")]) + b_y = torch.tensor([float("inf"), float("inf")]) + + gamma = 1.0 + kappa = 1.0 + g = squared_exponential_integral(a_x, a_y, b_x, b_y, gamma=gamma, kappa=kappa) + x = torch.tensor([[87, 0], [1123, 11]]) + assert torch.allclose(g(x), torch.tensor([torch.pi, torch.pi])) + + # Test with new bounds x in [0,1] and y in [0,1] + a_x = torch.tensor([0.0]) + a_y = torch.tensor([0.0]) + b_x = torch.tensor([1.0]) + b_y = torch.tensor([1.0]) + + g = squared_exponential_integral(a_x, a_y, b_x, b_y, gamma=10e-6, kappa=kappa) + x = torch.tensor([[0.5, 0.5], [0.25, 0.75]]) + assert torch.allclose(g(x), torch.tensor([1.0, 1.0])) + + a_x = torch.tensor([0.0, 1.0]) + a_y = torch.tensor([0.0, 2.0]) + b_x = torch.tensor([1.0, 3.0]) + b_y = torch.tensor([1.0, 4.0]) + + g = squared_exponential_integral(a_x, a_y, b_x, b_y, gamma=0.5, kappa=3.0) + x = torch.tensor([[0.5, 0.5], [2.0, 3.0]]) + result = g(x) + assert torch.allclose( + result, torch.tensor([[2.7639, 0.0548], [0.3794, 8.7851]]), atol=1e-4 + ) + + torch.ones(()) diff --git a/stpy/kernels.py b/stpy/kernels.py index 66fbd54..20d910d 100755 --- a/stpy/kernels.py +++ b/stpy/kernels.py @@ -6,6 +6,7 @@ from scipy.special import kv from sklearn.metrics.pairwise import check_pairwise_arrays, manhattan_distances from stpy.kernel_functions.squared_exponential_kernel import ( + squared_exponential_integral, squared_exponential_kernel_diag, ) @@ -192,6 +193,12 @@ def get_param_refs(self): def get_kernel(self): return self.kernel + def integral(self, a_x, a_y, b_x, b_y): + if self.optkernel == "squared_exponential": + return squared_exponential_integral(a_x, a_y, b_x, b_y, **self.params) + else: + raise NotImplementedError() + def get_kernel_internal(self, diag=False): self.params = { From 9cfacaf849f3825200b9f914c3b4b8d749fc09c2 Mon Sep 17 00:00:00 2001 From: peibensteine Date: Sun, 1 Dec 2024 16:06:53 +0100 Subject: [PATCH 21/39] make numpy torch conversions device-safe --- stpy/borel_set.py | 36 ++-- stpy/continuous_processes/gauss_procc.py | 8 +- stpy/continuous_processes/mkl_features.py | 6 +- stpy/continuous_processes/nystrom_fea.py | 77 ++++---- stpy/continuous_processes/primal_mkl.py | 8 +- .../truncated_kernelized_features.py | 2 +- stpy/embeddings/bump_bases.py | 6 +- stpy/embeddings/embedding.py | 2 +- stpy/embeddings/onehot_embedding.py | 2 +- stpy/embeddings/optimal_positive_basis.py | 17 +- stpy/embeddings/polynomial_embedding.py | 2 +- stpy/embeddings/positive_embedding.py | 39 ++-- stpy/helpers/abitrary_sampling.py | 6 +- stpy/helpers/helper.py | 2 +- stpy/kernels.py | 2 +- stpy/legacy/integral_kernels.py | 4 +- stpy/optim/custom_optimizers.py | 4 +- .../binomial/binomial_process_estimator.py | 16 +- .../link_fun_rate_estimator.py | 6 +- stpy/point_processes/loglinear_estimator.py | 2 +- .../point_processes/mbr_positive_estimator.py | 2 +- stpy/point_processes/poisson.py | 6 +- .../poisson/link_fun_rate_estimator.py | 6 +- .../poisson/loglinear_estimator.py | 2 +- .../poisson/mbr_positive_estimator.py | 2 +- stpy/point_processes/poisson/poisson.py | 4 +- .../point_processes/poisson_rate_estimator.py | 174 ++++++++++-------- .../positive_basis_estimator.py | 8 +- stpy/point_processes/rate_estimator.py | 8 +- .../point_processes/seasonal_point_process.py | 4 +- stpy/probability/likelihood.py | 2 +- stpy/test_functions/benchmarks.py | 6 +- tests/SRI_test.py | 4 +- .../domain_non_stationarity.py | 4 +- .../test_estimators/qff_nonstationary.py | 4 +- .../test_regularized_dictionary_l2.py | 2 +- tests/fourier-features-multidimensional.py | 2 +- tests/hessian-estimation-test.py | 10 +- 38 files changed, 267 insertions(+), 230 deletions(-) diff --git a/stpy/borel_set.py b/stpy/borel_set.py index 36c4fe5..2440e4f 100644 --- a/stpy/borel_set.py +++ b/stpy/borel_set.py @@ -37,7 +37,7 @@ def perimeter(self): def uniform_sample(self, n): sample = torch.zeros(n, self.d).double() for i in range(self.d): - sample_i = torch.from_numpy( + sample_i = torch.tensor( np.random.uniform(self.bounds[i, 0], self.bounds[i, 1], n) ) sample[:, i] = sample_i @@ -55,20 +55,22 @@ def return_legendre_discretization(self, n): nodes = cartesian(nodes_arr) weights = cartesian(weights_arr) - return torch.prod(torch.from_numpy(weights), dim=1), torch.from_numpy(nodes) + return torch.prod(torch.tensor(weights), dim=1), torch.tensor(nodes) def return_discretization(self, n, offsets=None): dis = [] for i in range(self.d): if offsets is None: - x = np.linspace(self.bounds[i, 0], self.bounds[i, 1], n) + x = np.linspace(self.bounds[i, 0].cpu(), self.bounds[i, 1].cpu(), n) else: x = np.linspace( - self.bounds[i, 0] - offsets[i], self.bounds[i, 1] + offsets[i], n + self.bounds[i, 0].cpu() - offsets[i].cpu(), + self.bounds[i, 1].cpu() + offsets[i].cpu(), + n, ) dis.append(x) r = cartesian(dis) - r = torch.from_numpy(r) + r = torch.tensor(r) return r def inside(self, set): @@ -122,7 +124,7 @@ def return_discretization(self, n): x = np.linspace(self.center - self.radius, self.center + self.radius, n) dis.append(x) r = cartesian(dis) - r = torch.from_numpy(r) + r = torch.tensor(r) return r elif self.d == 2: @@ -152,7 +154,7 @@ def return_discretization(self, n): # # points = np.concatenate((points,self.center.view(-1,self.d).numpy())) - return torch.from_numpy(points) + return torch.tensor(points) def return_legendre_discretization(self, n): if self.d == 2: @@ -167,7 +169,7 @@ def return_legendre_discretization(self, n): points[:, 0] += float(self.center[0]) points[:, 1] += float(self.center[1]) weights = np.outer(w, np.sin(mu * np.pi / (n + 1)) ** 2).flatten() / (n + 1) - return torch.from_numpy(weights), torch.from_numpy(points) + return torch.tensor(weights), torch.tensor(points) else: raise AssertionError("Wrong type of set considered.") @@ -222,9 +224,9 @@ class HierarchicalBorelSets: def __init__(self, d, interval, levels): if d == 1: - self.top_node = Node(d, torch.Tensor([interval]), None) + self.top_node = Node(d, torch.tensor([interval]), None) elif d == 2: - self.top_node = Node(d, torch.Tensor(interval), None) + self.top_node = Node(d, torch.tensor(interval), None) self.Sets = [self.top_node] self.levels = levels @@ -265,8 +267,8 @@ def construct_1d(self, interval, levels, S, parent): a, b = interval c = (a + b) / 2.0 - S_1 = Node(1, torch.Tensor([[a, c]]), parent) - S_2 = Node(1, torch.Tensor([[c, b]]), parent) + S_1 = Node(1, torch.tensor([[a, c]]), parent) + S_2 = Node(1, torch.tensor([[c, b]]), parent) parent.left = S_1 parent.right = S_2 @@ -289,10 +291,10 @@ def construct_2d(self, interval, levels, S, parent): midx = xa + (xb - xa) / 2.0 midy = ya + (yb - ya) / 2.0 - S1 = Node(2, torch.Tensor([[xa, midx], [ya, midy]]), parent) - S2 = Node(2, torch.Tensor([[xa, midx], [midy, yb]]), parent) - S3 = Node(2, torch.Tensor([[midx, xb], [ya, midy]]), parent) - S4 = Node(2, torch.Tensor([[midx, xb], [midy, yb]]), parent) + S1 = Node(2, torch.tensor([[xa, midx], [ya, midy]]), parent) + S2 = Node(2, torch.tensor([[xa, midx], [midy, yb]]), parent) + S3 = Node(2, torch.tensor([[midx, xb], [ya, midy]]), parent) + S4 = Node(2, torch.tensor([[midx, xb], [midy, yb]]), parent) parent.children = [S1, S2, S3, S4] @@ -304,7 +306,7 @@ def construct_2d(self, interval, levels, S, parent): if __name__ == "__main__": - center = torch.Tensor([0.5, 0.5]).double() + center = torch.tensor([0.5, 0.5]).double() radius = 0.1 d = 2 B = BallSet(d, center, radius) diff --git a/stpy/continuous_processes/gauss_procc.py b/stpy/continuous_processes/gauss_procc.py index 88989fc..e8882e1 100755 --- a/stpy/continuous_processes/gauss_procc.py +++ b/stpy/continuous_processes/gauss_procc.py @@ -1258,8 +1258,8 @@ def ucb_optimize(self, beta, multistart=25, lcb=False): sigma = lambda x: self.mean_std(x, reuse=True)[1][0][0] ucb = lambda x: torch.dot( - torch.Tensor([1.0, np.sqrt(beta)]), - torch.Tensor( + torch.tensor([1.0, np.sqrt(beta)]), + torch.tensor( [ self.mean_std(x, reuse=True)[0][0][0], self.mean_std(x, reuse=True)[1][0][0], @@ -1267,8 +1267,8 @@ def ucb_optimize(self, beta, multistart=25, lcb=False): ), ) lcb = lambda x: torch.dot( - torch.Tensor([1.0, np.sqrt(beta)]), - torch.Tensor( + torch.tensor([1.0, np.sqrt(beta)]), + torch.tensor( [ self.mean_std(x, reuse=True)[0][0][0], -self.mean_std(x, reuse=True)[1][0][0], diff --git a/stpy/continuous_processes/mkl_features.py b/stpy/continuous_processes/mkl_features.py index 04ccda7..42b6868 100755 --- a/stpy/continuous_processes/mkl_features.py +++ b/stpy/continuous_processes/mkl_features.py @@ -45,7 +45,7 @@ def fit_gp(self, x, y): # def mean_vector(self): # theta = torch.zeros(size = (self.total_embed_dim())) - # dims_index = torch.cumsum(torch.Tensor([0] + self.get_emebed_dims()),dim = 0).int() + # dims_index = torch.cumsum(torch.tensor([0] + self.get_emebed_dims()),dim = 0).int() # for index, emb in enumerate(self.embeddings): # theta_small = emb.sample_theta() # theta[dims_index[index]:dims_index[index + 1]] = theta_small.view(-1) @@ -78,7 +78,7 @@ def embed(self, xtest): n = xtest.size()[0] Phi = torch.zeros(size=(n, int(self.total_embed_dim())), dtype=torch.float64) dims_index = torch.cumsum( - torch.Tensor([0] + self.get_emebed_dims()), dim=0 + torch.tensor([0] + self.get_emebed_dims()), dim=0 ).int() for index, embedding in enumerate(self.embeddings): @@ -129,7 +129,7 @@ def acquisiton_function(self, C, Phi, candidates): values.append(self.evaluate_design(C, newPhi)[0]) ranks.append(self.evaluate_design(C, newPhi)[1]) - return [torch.Tensor(values), torch.Tensor(ranks)] + return [torch.tensor(values), torch.tensor(ranks)] if __name__ == "__main__": diff --git a/stpy/continuous_processes/nystrom_fea.py b/stpy/continuous_processes/nystrom_fea.py index 03c06e6..dec4f0a 100755 --- a/stpy/continuous_processes/nystrom_fea.py +++ b/stpy/continuous_processes/nystrom_fea.py @@ -149,8 +149,8 @@ def fit_gp(self, x, y, eps=1e-14): ysample = GP.sample(x, size=self.samples) ** 2 X = ysample model = NMF(n_components=self.ms, max_iter=8000, tol=1e-12) - W = torch.from_numpy(model.fit_transform(X)) - H = torch.from_numpy(model.components_) + W = torch.tensor(model.fit_transform(X.cpu())) + H = torch.tensor(model.components_) l = torch.norm(W, dim=1) l = 1.0 / l @@ -159,22 +159,25 @@ def fit_gp(self, x, y, eps=1e-14): for j in range(self.ms): fs.append( interp1d( - x.view(-1).numpy(), (W.T @ torch.diag(l))[j, :].numpy() + x.view(-1).cpu().numpy(), + (W.T @ torch.diag(l))[j, :].cpu().numpy(), ) ) self.embed = lambda q: torch.cat( - [torch.from_numpy(fs[j](q)).view(-1, 1) for j in range(self.ms)], + [torch.tensor(fs[j](q)).view(-1, 1) for j in range(self.ms)], dim=1, ) elif x.size()[1] == 2: fs = [] for j in range(self.ms): - W_j = (W.T @ torch.diag(l))[j, :].numpy() - fs.append(LinearNDInterpolator(x, W_j)) + W_j = (W.T @ torch.diag(l))[j, :].cpu().numpy() + fs.append(LinearNDInterpolator(x.cpu().numpy(), W_j)) self.embed = lambda q: torch.cat( [ - torch.from_numpy(fs[j](q[:, 0], q[:, 1])).view(-1, 1) + torch.tensor( + fs[j](q[:, 0].cpu().numpy(), q[:, 1].cpu().numpy()) + ).view(-1, 1) for j in range(self.ms) ], dim=1, @@ -182,9 +185,9 @@ def fit_gp(self, x, y, eps=1e-14): # elif x.size()[1] == 2: # fs = [] # for j in range(self.ms): - # W_j = (W.T @ torch.diag(l))[j, :].numpy() + # W_j = (W.T @ torch.diag(l))[j, :].cpu().numpy() # fs.append(Rbf(x[:,0],x[:,1], W_j)) - # self.embed = lambda q: torch.cat([torch.from_numpy(fs[j](q[:,0],q[:,1])).view(-1, 1) for j in range(self.ms)], + # self.embed = lambda q: torch.cat([torch.tensor(fs[j](q[:,0],q[:,1])).view(-1, 1) for j in range(self.ms)], # dim=1) self.C = [] @@ -193,7 +196,7 @@ def fit_gp(self, x, y, eps=1e-14): K = self.kernel( x, x ) # + self.s * self.s * torch.eye(self.N, dtype=torch.float64) - Khalf = torch.from_numpy(np.real(scipy.linalg.sqrtm(K.numpy()))) + Khalf = torch.tensor(np.real(scipy.linalg.sqrtm(K.cpu().numpy()))) Khalfinv = torch.pinverse(Khalf) self.embed = lambda q: torch.t(torch.mm(Khalfinv, self.kernel(q, self.x))) else: @@ -288,24 +291,32 @@ def visualize(self, xtest, f_true=None, points=True, show=True): plt.figure(figsize=(15, 7)) plt.clf() - plt.plot(self.x.numpy(), self.y.numpy(), "r+", ms=10, marker="o") plt.plot( - self.x[self.C, :].numpy(), - self.y[self.C, :].numpy(), + self.x.cpu().numpy(), self.y.cpu().numpy(), "r+", ms=10, marker="o" + ) + plt.plot( + self.x[self.C, :].cpu().numpy(), + self.y[self.C, :].cpu().numpy(), "g+", ms=10, marker="o", ) - # plt.plot(xtest.numpy(), self.sample(xtest, size=2).numpy(), 'k--', lw=2, label="sample") + # plt.plot(xtest.cpu().numpy(), self.sample(xtest, size=2).cpu().numpy(), 'k--', lw=2, label="sample") plt.fill_between( - xtest.numpy().flat, - (mu - 2 * std).numpy().flat, - (mu + 2 * std).numpy().flat, + xtest.cpu().numpy().flat, + (mu - 2 * std).cpu().numpy().flat, + (mu + 2 * std).cpu().numpy().flat, color="#dddddd", ) if f_true is not None: - plt.plot(xtest.numpy(), f_true(xtest).numpy(), "b-", lw=2) - plt.plot(xtest.numpy(), mu.numpy(), "r-", lw=2, label="posterior mean") + plt.plot(xtest.cpu().numpy(), f_true(xtest).cpu().numpy(), "b-", lw=2) + plt.plot( + xtest.cpu().numpy(), + mu.cpu().numpy(), + "r-", + lw=2, + label="posterior mean", + ) plt.title("Posterior mean prediction plus 2 st.deviation") plt.legend() if show == True: @@ -317,27 +328,27 @@ def visualize(self, xtest, f_true=None, points=True, show=True): plt.figure(figsize=(15, 7)) plt.clf() ax = plt.axes(projection="3d") - xx = xtest[:, 0].numpy() - yy = xtest[:, 1].numpy() + xx = xtest[:, 0].cpu().numpy() + yy = xtest[:, 1].cpu().numpy() grid_x, grid_y = np.mgrid[ min(xx) : max(xx) : 100j, min(yy) : max(yy) : 100j ] grid_z_mu = griddata( - (xx, yy), mu[:, 0].numpy(), (grid_x, grid_y), method="linear" + (xx, yy), mu[:, 0].cpu().numpy(), (grid_x, grid_y), method="linear" ) if f_true is not None: grid_z = griddata( (xx, yy), - f_true(xtest)[:, 0].numpy(), + f_true(xtest)[:, 0].cpu().numpy(), (grid_x, grid_y), method="linear", ) ax.plot_surface(grid_x, grid_y, grid_z, color="b", alpha=0.4) if points == True: ax.scatter( - self.x[:, 0].numpy(), - self.x[:, 1].numpy(), - self.y[:, 0].numpy(), + self.x[:, 0].cpu().numpy(), + self.x[:, 1].cpu().numpy(), + self.y[:, 0].cpu().numpy(), c="r", s=100, marker="o", @@ -363,13 +374,11 @@ def visualize(self, xtest, f_true=None, points=True, show=True): # number of intial points N = 100 # smoothness - gamma = torch.from_numpy(np.array([0.4, 0.4])) + gamma = torch.tensor(np.array([0.4, 0.4])) # test problem - xtest = torch.from_numpy(interval(n, d)) - x = torch.from_numpy( - np.random.uniform(-L_infinity_ball, L_infinity_ball, size=(N, d)) - ) + xtest = torch.tensor(interval(n, d)) + x = torch.tensor(np.random.uniform(-L_infinity_ball, L_infinity_ball, size=(N, d))) f_no_noise = lambda q: torch.sin(torch.sum(q * 4, dim=1)).view(-1, 1) # f_no_noise = lambda q: torch.sin((q[:,0] * 4)).view(-1, 1) @@ -391,15 +400,15 @@ def visualize(self, xtest, f_true=None, points=True, show=True): GP0.fit_gp(x, y) GP0.visualize(xtest, f_true=f_no_noise) - GP = NystromFeatures(kernel, m=torch.Tensor([30]), s=s, approx="uniform") + GP = NystromFeatures(kernel, m=torch.tensor([30]), s=s, approx="uniform") GP.fit_gp(x, y) GP.visualize(xtest, f_true=f_no_noise) - GP = NystromFeatures(kernel, m=torch.Tensor([30]), s=s, approx="online_leverage") + GP = NystromFeatures(kernel, m=torch.tensor([30]), s=s, approx="online_leverage") GP.fit_gp(x, y) GP.visualize(xtest, f_true=f_no_noise) - GP = NystromFeatures(kernel, m=torch.Tensor([30]), s=s, approx="svd") + GP = NystromFeatures(kernel, m=torch.tensor([30]), s=s, approx="svd") GP.fit_gp(x, y) print(GP0.K, GP.outer_kernel()) GP.visualize(xtest, f_true=f_no_noise) diff --git a/stpy/continuous_processes/primal_mkl.py b/stpy/continuous_processes/primal_mkl.py index 7ee2f66..84965c0 100755 --- a/stpy/continuous_processes/primal_mkl.py +++ b/stpy/continuous_processes/primal_mkl.py @@ -19,7 +19,7 @@ def total_embed_dim(self): self.dims = [] for embedding in self.embeddings: self.dims.append(embedding.get_basis_size()) - sum = torch.sum(torch.Tensor(self.dims)) + sum = torch.sum(torch.tensor(self.dims)) return sum def get_emebed_dims(self): @@ -43,7 +43,7 @@ def get_emebed_dims(self): # self.y = y # (self.n, self.d) = self.x.size() # self.total_m = self.total_embed_dim() - # dims_index = torch.cumsum(torch.Tensor([0] + self.get_emebed_dims()),dim = 0).int() + # dims_index = torch.cumsum(torch.tensor([0] + self.get_emebed_dims()),dim = 0).int() # self.w = [torch.ones(size = (i,1), dtype = torch.float64,requires_grad = True) for i in self.get_emebed_dims()] # # self.theta = torch.ones(size = (self.no_models,1), dtype = torch.float64,requires_grad = True) @@ -56,7 +56,7 @@ def fit_gp(self, x, y): (self.n, self.d) = self.x.size() self.total_m = self.total_embed_dim() dims_index = torch.cumsum( - torch.Tensor([0] + self.get_emebed_dims()), dim=0 + torch.tensor([0] + self.get_emebed_dims()), dim=0 ).int() self.w = [ @@ -140,7 +140,7 @@ def cost(w): def mean_var(self, xtest): n = xtest.size()[0] dims_index = torch.cumsum( - torch.Tensor([0] + self.get_emebed_dims()), dim=0 + torch.tensor([0] + self.get_emebed_dims()), dim=0 ).int() Phi = torch.zeros(size=(n, int(self.total_m)), dtype=torch.float64) diff --git a/stpy/continuous_processes/truncated_kernelized_features.py b/stpy/continuous_processes/truncated_kernelized_features.py index b6f84b1..01fc298 100644 --- a/stpy/continuous_processes/truncated_kernelized_features.py +++ b/stpy/continuous_processes/truncated_kernelized_features.py @@ -69,7 +69,7 @@ def add_points(self, d): if self.x is not None: self.x = torch.cat((self.x, x), dim=0) self.y = torch.cat((self.y, y), dim=0) - new_alpha = torch.Tensor([self.alpha_score(self.x.size()[0])]).view(1, 1) + new_alpha = torch.tensor([self.alpha_score(self.x.size()[0])]).view(1, 1) self.alphas = torch.cat((self.alphas, new_alpha), dim=0) else: self.x = x diff --git a/stpy/embeddings/bump_bases.py b/stpy/embeddings/bump_bases.py index 48b08a5..505d42d 100644 --- a/stpy/embeddings/bump_bases.py +++ b/stpy/embeddings/bump_bases.py @@ -105,7 +105,7 @@ def integral(self, S): tk = self.interval[0] + (j // self.m) * dm # triangle center point - # center_point = torch.Tensor( [tj,tk]).view(-1,1) + # center_point = torch.tensor( [tj,tk]).view(-1,1) vol = self.integrate_1d(xa.numpy(), xb.numpy(), tk, dm) vol2 = self.integrate_1d(ya.numpy(), yb.numpy(), tj, dm) psi[j] = float(vol * vol2) @@ -198,7 +198,7 @@ def hierarchical_mask(self): for i in range(int(np.log2(self.m))): for j in range(2**i): mask.append(i + 1) - return torch.Tensor(mask) + return torch.tensor(mask) def product_integral(self): raise NotImplementedError("Not implemented.") @@ -304,7 +304,7 @@ def __init__(self, *args, samples=300, **kwargs): super().__init__(*args, **kwargs) self.samples = np.maximum(samples, self.m) - B = BorelSet(1, torch.Tensor([[self.interval[0], self.interval[1]]]).double()) + B = BorelSet(1, torch.tensor([[self.interval[0], self.interval[1]]]).double()) x = B.return_discretization(256) y = x[:, 0].view(-1, 1) * 0 diff --git a/stpy/embeddings/embedding.py b/stpy/embeddings/embedding.py index 021dfa7..3d2ac08 100755 --- a/stpy/embeddings/embedding.py +++ b/stpy/embeddings/embedding.py @@ -862,7 +862,7 @@ def __init__(self, embeddings, ms, groups=None, scaling=None, additive=True): self.groups = [[i] for i in range(len(self.emebeddings))] try: - self.ms = torch.Tensor(ms) + self.ms = torch.tensor(ms) except: self.ms = ms diff --git a/stpy/embeddings/onehot_embedding.py b/stpy/embeddings/onehot_embedding.py index 944a532..1b32a63 100644 --- a/stpy/embeddings/onehot_embedding.py +++ b/stpy/embeddings/onehot_embedding.py @@ -35,5 +35,5 @@ def embed(self, x): if __name__ == "__main__": emb = OnehotEmbedding(20, 2) - x = torch.Tensor([[2, 3], [4, 5], [10, 19]]) + x = torch.tensor([[2, 3], [4, 5], [10, 19]]) print(emb.embed(x)) diff --git a/stpy/embeddings/optimal_positive_basis.py b/stpy/embeddings/optimal_positive_basis.py index 9ccc3c6..096c4e5 100644 --- a/stpy/embeddings/optimal_positive_basis.py +++ b/stpy/embeddings/optimal_positive_basis.py @@ -20,7 +20,7 @@ def __init__( B = BorelSet( self.d, - torch.Tensor( + torch.tensor( [[self.interval[0], self.interval[1]] for _ in range(self.d)] ).double(), ) @@ -121,12 +121,13 @@ def cov(self, inverse=False): Z = self.embed_internal(t) M = torch.pinverse(Z.T @ Z + (self.s) * torch.eye(self.Gamma.size()[0])) - self.M = torch.from_numpy(np.real(scipy.linalg.sqrtm(M.numpy()))) + self.M = torch.tensor(np.real(scipy.linalg.sqrtm(M.cpu().numpy()))) - self.Gamma_half = torch.from_numpy( + self.Gamma_half = torch.tensor( np.real( scipy.linalg.sqrtm( - self.Gamma.numpy() + (self.s**2) * np.eye(self.Gamma.size()[0]) + self.Gamma.cpu().numpy() + + (self.s**2) * np.eye(self.Gamma.size()[0]) ) ) ) @@ -164,9 +165,9 @@ def cov(self, inverse=False): ) GP = GaussianProcess(d=d, s=s) - xtest = torch.from_numpy(interval(n, d)) + xtest = torch.tensor(interval(n, d)) - x = torch.from_numpy(np.random.uniform(-1, 1, size=(N, d))) + x = torch.tensor(np.random.uniform(-1, 1, size=(N, d))) F_true = lambda x: torch.sum(torch.sin(x) ** 2 - 0.1, dim=1).view(-1, 1) F = lambda x: F_true(x) + s * torch.randn(x.size()[0]).view(-1, 1).double() @@ -177,8 +178,8 @@ def cov(self, inverse=False): fig, axs = plt.subplots(msqrt, msqrt, figsize=(15, 7)) for i in range(m): f_i = Emb.basis_fun(xtest, i) ## basis function - xx = xtest[:, 0].numpy() - yy = xtest[:, 1].numpy() + xx = xtest[:, 0].cpu().numpy() + yy = xtest[:, 1].cpu().numpy() ax = axs[int(i // msqrt), (i % msqrt)] grid_x, grid_y = np.mgrid[min(xx) : max(xx) : 100j, min(yy) : max(yy) : 100j] grid_z_f = griddata( diff --git a/stpy/embeddings/polynomial_embedding.py b/stpy/embeddings/polynomial_embedding.py index 2634641..07be852 100755 --- a/stpy/embeddings/polynomial_embedding.py +++ b/stpy/embeddings/polynomial_embedding.py @@ -85,7 +85,7 @@ def integral(self, S): for i in range(self.m): Fi = lambda x: self.embed(x).view(-1)[i] integrand = lambda x, y: Fi( - torch.Tensor([x, y]).view(1, 2).double() + torch.tensor([x, y]).view(1, 2).double() ).numpy() val, status = integrate.dblquad( integrand, diff --git a/stpy/embeddings/positive_embedding.py b/stpy/embeddings/positive_embedding.py index 635af03..b19f208 100644 --- a/stpy/embeddings/positive_embedding.py +++ b/stpy/embeddings/positive_embedding.py @@ -60,7 +60,7 @@ def __init__( self.interval = (self.interval[0] - offset, self.interval[1] + offset) self.borel_set = BorelSet( - d=1, bounds=torch.Tensor([[self.interval[0], self.interval[1]]]).double() + d=1, bounds=torch.tensor([[self.interval[0], self.interval[1]]]).double() ) self.mu = None self.precomp = False @@ -84,9 +84,9 @@ def basis_fun(self, x, j): def get_constraints(self): s = self.m**self.d - l = torch.from_numpy(np.full(s, self.b)) - u = torch.from_numpy(np.full(s, self.B)) - Lambda = torch.from_numpy(np.identity(s)) + l = torch.tensor(np.full(s, self.b)) + u = torch.tensor(np.full(s, self.B)) + Lambda = torch.tensor(np.identity(s)) return (l, Lambda, u) def cov(self, inverse=False): @@ -104,20 +104,20 @@ def cov(self, inverse=False): if self.d == 1: t = t.view(-1, 1).double() elif self.d == 2: - t = torch.from_numpy(cartesian([t.numpy(), t.numpy()])).double() + t = torch.tensor(cartesian([t.cpu().numpy(), t.cpu().numpy()])).double() elif self.d == 3: - t = torch.from_numpy( - cartesian([t.numpy(), t.numpy(), t.numpy()]) + t = torch.tensor( + cartesian([t.cpu().numpy(), t.cpu().numpy(), t.cpu().numpy()]) ).double() if self.kernel is not None: self.Gamma = self.kernel(t, t) Z = self.embed_internal(t) M = torch.pinverse(Z.T @ Z + (self.s) * torch.eye(self.Gamma.size()[0])) - self.M = torch.from_numpy(np.real(scipy.linalg.sqrtm(M.numpy()))) - self.Gamma_half = torch.from_numpy( + self.M = torch.tensor(np.real(scipy.linalg.sqrtm(M.cpu().numpy()))) + self.Gamma_half = torch.tensor( np.real( scipy.linalg.sqrtm( - self.Gamma.numpy() + self.Gamma.cpu().numpy() + 1e-5 * (self.s**2) * np.eye(self.Gamma.size()[0]) ) ) @@ -156,8 +156,8 @@ def embed_internal(self, x): out = [] for i in range(n): out.append( - torch.from_numpy( - np.kron(phi_1[i, :].numpy(), phi_2[i, :].numpy()) + torch.tensor( + np.kron(phi_1[i, :].cpu().numpy(), phi_2[i, :].cpu().numpy()), ).view(1, -1) ) out = torch.cat(out, dim=0) @@ -180,10 +180,12 @@ def embed_internal(self, x): out = [] for i in range(n): out.append( - torch.from_numpy( + torch.tensor( np.kron( phi_3[i, :], - np.kron(phi_1[i, :].numpy(), phi_2[i, :].numpy()), + np.kron( + phi_1[i, :].cpu().numpy(), phi_2[i, :].cpu().numpy() + ), ) ).view(1, -1) ) @@ -199,15 +201,16 @@ def fit(self, x, y, already_embeded=False): if already_embeded == False: Phi = self.embed(x).numpy() else: - Phi = x.numpy() + Phi = x.cpu().numpy() xi = cp.Variable(m) obj = cp.Minimize( - self.s**2 * cp.norm2(xi) + cp.sum_squares(Phi @ xi - y.numpy().reshape(-1)) + self.s**2 * cp.norm2(xi) + + cp.sum_squares(Phi @ xi - y.cpu().numpy().reshape(-1)) ) constraints = [] - Lambda = Lambda @ Gamma_half.numpy() + Lambda = Lambda @ Gamma_half.cpu().numpy() if not np.all(l == -np.inf): constraints.append(Lambda[l != -np.inf] @ xi >= l[l != -np.inf]) if not np.all(u == np.inf): @@ -225,7 +228,7 @@ def fit(self, x, y, already_embeded=False): raise ValueError("cannot compute the mode") mode = xi.value - self.mode = torch.from_numpy(mode).view(-1, 1) + self.mode = torch.tensor(mode).view(-1, 1) self.mu = self.mode return mode diff --git a/stpy/helpers/abitrary_sampling.py b/stpy/helpers/abitrary_sampling.py index 55eb46d..e14887d 100644 --- a/stpy/helpers/abitrary_sampling.py +++ b/stpy/helpers/abitrary_sampling.py @@ -182,8 +182,8 @@ def randomly_split_set_without_duplicates_general( # randomly permute indices inde = torch.from_numpy(np.random.permutation(np.arange(0, n, 1))) - cumsum_indices = torch.cumsum(torch.Tensor(sizes), 0).int() - cumsum_indices = torch.cat((torch.Tensor([0]), cumsum_indices)).int() + cumsum_indices = torch.cumsum(torch.tensor(sizes), 0).int() + cumsum_indices = torch.cat((torch.tensor([0]), cumsum_indices)).int() masks = [torch.zeros(N).bool() for _ in sizes] for j in range(len(sizes)): @@ -198,7 +198,7 @@ def randomly_split_set_without_duplicates_general( if __name__ == "__main__": - # x = torch.Tensor([[2, 1, 1], [2, 1, 1], [2, 2, 2], + # x = torch.tensor([[2, 1, 1], [2, 1, 1], [2, 2, 2], # [3, 2, 2], [2, 1, 1], [4, 2, 1], # [4, 2, 4], [4,4,4], [1,2,2]]).double() # diff --git a/stpy/helpers/helper.py b/stpy/helpers/helper.py index 67a9e63..5371e61 100755 --- a/stpy/helpers/helper.py +++ b/stpy/helpers/helper.py @@ -566,6 +566,6 @@ def get_indices(xtest, x): if __name__ == "__main__": x = torch.arange(0, 9, 1).reshape(3, 3) - xtrial = torch.Tensor([[0, 1, 2], [6, 7, 8], [3, 4, 5]]) + xtrial = torch.tensor([[0, 1, 2], [6, 7, 8], [3, 4, 5]]) print(x) print(get_indices(x, xtrial)) diff --git a/stpy/kernels.py b/stpy/kernels.py index 20d910d..a151417 100755 --- a/stpy/kernels.py +++ b/stpy/kernels.py @@ -56,7 +56,7 @@ def __init__( self.ard_gamma = torch.ones(d).double() else: try: - self.ard_gamma = torch.Tensor([ard_gamma]).double() + self.ard_gamma = torch.tensor([ard_gamma]).double() except: self.ard_gamma = ard_gamma self.power = power diff --git a/stpy/legacy/integral_kernels.py b/stpy/legacy/integral_kernels.py index 004f32c..d56aa98 100755 --- a/stpy/legacy/integral_kernels.py +++ b/stpy/legacy/integral_kernels.py @@ -489,14 +489,14 @@ def bach_algortihm(self, size=1, candidates=100): def pca(self, kernel, size=1): if size > self.n: size = self.n - GP = NystromFeatures(kernel, m=torch.Tensor([size]), s=self.s, approx="svd") + GP = NystromFeatures(kernel, m=torch.tensor([size]), s=self.s, approx="svd") GP.fit_gp(self.x, self.y) return GP.outer_kernel() def nystrom(self, kernel, size=1): if size > self.n: size = self.n - GP = NystromFeatures(kernel, m=torch.Tensor([size]), s=self.s, approx="uniform") + GP = NystromFeatures(kernel, m=torch.tensor([size]), s=self.s, approx="uniform") GP.fit_gp(self.x, self.y) return GP.outer_kernel() diff --git a/stpy/optim/custom_optimizers.py b/stpy/optim/custom_optimizers.py index 043ae8e..2db66c2 100644 --- a/stpy/optim/custom_optimizers.py +++ b/stpy/optim/custom_optimizers.py @@ -271,7 +271,7 @@ def solve_mpi(Q, c, tau, verbose=True, up=None, low=None, xwarm=None): def newton_solve(f, x0, eps=1e-3, maxiter=100, verbose=False, grad=None): """ - >>> newton_solve(lambda x: x**2,torch.Tensor([2.0,1.0]).double().view(-1)) + >>> newton_solve(lambda x: x**2,torch.tensor([2.0,1.0]).double().view(-1)) tensor([0., 0.], dtype=torch.float64) """ lam = 1.0 @@ -339,5 +339,5 @@ def matrix_recovery_hermitian_trace_regression(X, b, eps=1e-5): if __name__ == "__main__": newton_solve( - lambda x: x**2, torch.Tensor([2.0, 1.0]).double().view(-1), verbose=True + lambda x: x**2, torch.tensor([2.0, 1.0]).double().view(-1), verbose=True ) diff --git a/stpy/point_processes/binomial/binomial_process_estimator.py b/stpy/point_processes/binomial/binomial_process_estimator.py index 48b8ec7..2096af3 100644 --- a/stpy/point_processes/binomial/binomial_process_estimator.py +++ b/stpy/point_processes/binomial/binomial_process_estimator.py @@ -83,12 +83,12 @@ def add_data_point(self, datapoint): phi = self.embed_set(S) if self.phis is not None: - self.counts = torch.cat((self.counts, torch.Tensor([count]))) - self.pool = torch.cat((self.pool, torch.Tensor([pool]))) + self.counts = torch.cat((self.counts, torch.tensor([count]))) + self.pool = torch.cat((self.pool, torch.tensor([pool]))) self.phis = torch.cat((self.phis, phi), dim=0) else: - self.counts = torch.Tensor([count]).double() - self.pool = torch.Tensor([pool]).double() + self.counts = torch.tensor([count]).double() + self.pool = torch.tensor([pool]).double() self.phis = phi def nabla(self, theta): @@ -214,7 +214,7 @@ def ucb(self, S, beta=8.0, delta=0.1): self.embed_set(S) @ self.rate + beta * self.embed_set(S) @ self.invW @ self.embed_set(S).T ) - return torch.minimum(torch.Tensor([[1.0]]).double(), ucb) + return torch.minimum(torch.tensor([[1.0]]).double(), ucb) elif self.uncertainty == "ratio": phi = self.embed_set(S) @@ -251,7 +251,7 @@ def ucb(self, S, beta=8.0, delta=0.1): }, ) return torch.minimum( - torch.Tensor([[1.0]]).double(), torch.from_numpy(np.array(prob.value)) + torch.tensor([[1.0]]).double(), torch.from_numpy(np.array(prob.value)) ) def lcb(self, S, beta=8.0, delta=0.1): @@ -260,7 +260,7 @@ def lcb(self, S, beta=8.0, delta=0.1): self.embed_set(S) @ self.rate - beta * self.embed_set(S) @ self.invW @ self.embed_set(S).T ) - return torch.maximum(torch.Tensor([[0.0]]).double(), lcb) + return torch.maximum(torch.tensor([[0.0]]).double(), lcb) elif self.uncertainty == "ratio": phi = self.embed_set(S) @@ -297,7 +297,7 @@ def lcb(self, S, beta=8.0, delta=0.1): ) return torch.maximum( - torch.Tensor([[0.0]]).double(), torch.from_numpy(np.array(prob.value)) + torch.tensor([[0.0]]).double(), torch.from_numpy(np.array(prob.value)) ) def fit_gp(self, threads=4): diff --git a/stpy/point_processes/link_fun_rate_estimator.py b/stpy/point_processes/link_fun_rate_estimator.py index 538e2f2..a188513 100644 --- a/stpy/point_processes/link_fun_rate_estimator.py +++ b/stpy/point_processes/link_fun_rate_estimator.py @@ -63,7 +63,7 @@ def product_integral(self, S): @ self.packing.embed(x).view(1, -1) ).view(-1) integrand = lambda x, y: F( - torch.Tensor([x, y]).view(1, 2).double() + torch.tensor([x, y]).view(1, 2).double() ).numpy() val = quadvec2( @@ -117,7 +117,7 @@ def product_integral(self, S): * self.packing.embed(x).view(-1)[j] ) integrand = lambda x, y: F_ij( - torch.Tensor([x, y]).view(1, 2).double() + torch.tensor([x, y]).view(1, 2).double() ).numpy() val, status = integrate.dblquad( integrand, @@ -466,7 +466,7 @@ def mean_rate(self, S, n=128): hierarchical_structure = HierarchicalBorelSets(d=1, interval=(-1, 1), levels=levels) Sets = hierarchical_structure.get_all_sets() - D = BorelSet(1, bounds=torch.Tensor([[-1.0, 1.0]]).double()) + D = BorelSet(1, bounds=torch.tensor([[-1.0, 1.0]]).double()) m = 64 embedding = HermiteEmbedding(m=m, d=1, gamma=gamma) diff --git a/stpy/point_processes/loglinear_estimator.py b/stpy/point_processes/loglinear_estimator.py index d971b08..179dc79 100644 --- a/stpy/point_processes/loglinear_estimator.py +++ b/stpy/point_processes/loglinear_estimator.py @@ -151,7 +151,7 @@ def mean_set(self, S, dt=1.0): hierarchical_structure = HierarchicalBorelSets(d=1, interval=(-1, 1), levels=levels) Sets = hierarchical_structure.get_all_sets() - D = BorelSet(1, bounds=torch.Tensor([[-1.0, 1.0]]).double()) + D = BorelSet(1, bounds=torch.tensor([[-1.0, 1.0]]).double()) m = 128 k = KernelFunction(gamma=gamma) diff --git a/stpy/point_processes/mbr_positive_estimator.py b/stpy/point_processes/mbr_positive_estimator.py index de86c9e..ef0d29d 100644 --- a/stpy/point_processes/mbr_positive_estimator.py +++ b/stpy/point_processes/mbr_positive_estimator.py @@ -372,7 +372,7 @@ def information(self, S, dt, precomputed=None): hierarchical_structure = HierarchicalBorelSets(d=1, interval=(-1, 1), levels=levels) Sets = hierarchical_structure.get_all_sets() - D = BorelSet(1, bounds=torch.Tensor([[-1.0, 1.0]]).double()) + D = BorelSet(1, bounds=torch.tensor([[-1.0, 1.0]]).double()) m = 32 embedding = HermiteEmbedding(m=m, d=1, gamma=gamma) diff --git a/stpy/point_processes/poisson.py b/stpy/point_processes/poisson.py index 12703fc..8a1ca1f 100644 --- a/stpy/point_processes/poisson.py +++ b/stpy/point_processes/poisson.py @@ -44,13 +44,13 @@ def rate_volume(self, S, dt=1, rate=None): if self.d == 1: # integrate = S.volume()* self.rate(torch.from_numpy(S.bounds[0,1]).view(1)) integral, _ = integrate.quad( - lambda x: rate(torch.Tensor([x]).view(1, 1)).numpy(), + lambda x: rate(torch.tensor([x]).view(1, 1)).numpy(), float(S.bounds[0, 0]), float(S.bounds[0, 1]), ) elif self.d == 2: integrand = lambda x, y: rate( - torch.Tensor([x, y]).view(1, 2).double() + torch.tensor([x, y]).view(1, 2).double() ).numpy() integral, _ = integrate.dblquad( integrand, @@ -190,7 +190,7 @@ def visualize(self, S, samples=2, n=10, dt=1.0, show=True): if __name__ == "__main__": d = 2 n = 100 - bounds = torch.Tensor([[-1, 1], [-1, 1]]).double() + bounds = torch.tensor([[-1, 1], [-1, 1]]).double() D = BorelSet(d, bounds) process = PoissonPointProcess(d=d, B=2) diff --git a/stpy/point_processes/poisson/link_fun_rate_estimator.py b/stpy/point_processes/poisson/link_fun_rate_estimator.py index 3ecfef2..02e46fb 100644 --- a/stpy/point_processes/poisson/link_fun_rate_estimator.py +++ b/stpy/point_processes/poisson/link_fun_rate_estimator.py @@ -62,7 +62,7 @@ def product_integral(self, S): @ self.packing.embed(x).view(1, -1) ).view(-1) integrand = lambda x, y: F( - torch.Tensor([x, y]).view(1, 2).double() + torch.tensor([x, y]).view(1, 2).double() ).numpy() val = quadvec2( @@ -116,7 +116,7 @@ def product_integral(self, S): * self.packing.embed(x).view(-1)[j] ) integrand = lambda x, y: F_ij( - torch.Tensor([x, y]).view(1, 2).double() + torch.tensor([x, y]).view(1, 2).double() ).numpy() val, status = integrate.dblquad( integrand, @@ -460,7 +460,7 @@ def mean_rate(self, S, n=128): hierarchical_structure = HierarchicalBorelSets(d=1, interval=(-1, 1), levels=levels) Sets = hierarchical_structure.get_all_sets() - D = BorelSet(1, bounds=torch.Tensor([[-1.0, 1.0]]).double()) + D = BorelSet(1, bounds=torch.tensor([[-1.0, 1.0]]).double()) m = 64 embedding = HermiteEmbedding(m=m, d=1, gamma=gamma) diff --git a/stpy/point_processes/poisson/loglinear_estimator.py b/stpy/point_processes/poisson/loglinear_estimator.py index 0e54199..d9ba3de 100644 --- a/stpy/point_processes/poisson/loglinear_estimator.py +++ b/stpy/point_processes/poisson/loglinear_estimator.py @@ -143,7 +143,7 @@ def mean_set(self, S, dt=1.0): hierarchical_structure = HierarchicalBorelSets(d=1, interval=(-1, 1), levels=levels) Sets = hierarchical_structure.get_all_sets() - D = BorelSet(1, bounds=torch.Tensor([[-1.0, 1.0]]).double()) + D = BorelSet(1, bounds=torch.tensor([[-1.0, 1.0]]).double()) m = 128 k = KernelFunction(gamma=gamma) diff --git a/stpy/point_processes/poisson/mbr_positive_estimator.py b/stpy/point_processes/poisson/mbr_positive_estimator.py index 7d71a27..8993cb7 100644 --- a/stpy/point_processes/poisson/mbr_positive_estimator.py +++ b/stpy/point_processes/poisson/mbr_positive_estimator.py @@ -379,7 +379,7 @@ def information(self, S, dt, precomputed=None): hierarchical_structure = HierarchicalBorelSets(d=1, interval=(-1, 1), levels=levels) Sets = hierarchical_structure.get_all_sets() - D = BorelSet(1, bounds=torch.Tensor([[-1.0, 1.0]]).double()) + D = BorelSet(1, bounds=torch.tensor([[-1.0, 1.0]]).double()) m = 32 embedding = HermiteEmbedding(m=m, d=1, gamma=gamma) diff --git a/stpy/point_processes/poisson/poisson.py b/stpy/point_processes/poisson/poisson.py index 52c6576..74460dd 100644 --- a/stpy/point_processes/poisson/poisson.py +++ b/stpy/point_processes/poisson/poisson.py @@ -45,13 +45,13 @@ def rate_volume(self, S, dt=1, rate=None): if self.d == 1: # integrate = S.volume()* self.rate(torch.from_numpy(S.bounds[0,1]).view(1)) integral, _ = integrate.quad( - lambda x: rate(torch.Tensor([x]).view(1, 1)).numpy(), + lambda x: rate(torch.tensor([x]).view(1, 1)).numpy(), float(S.bounds[0, 0]), float(S.bounds[0, 1]), ) elif self.d == 2: integrand = lambda x, y: rate( - torch.Tensor([x, y]).view(1, 2).double() + torch.tensor([x, y]).view(1, 2).double() ).numpy() integral, _ = integrate.dblquad( integrand, diff --git a/stpy/point_processes/poisson_rate_estimator.py b/stpy/point_processes/poisson_rate_estimator.py index c065f4f..024ef00 100644 --- a/stpy/point_processes/poisson_rate_estimator.py +++ b/stpy/point_processes/poisson_rate_estimator.py @@ -267,7 +267,7 @@ def beta_theory(self): ## constraints eps = 10e-3 - res = Gamma_half @ self.rate.view(-1, 1) - torch.from_numpy(l).view(-1, 1) + res = Gamma_half @ self.rate.view(-1, 1) - torch.tensor(l).view(-1, 1) xi = res.clone() xi[res > eps] = 0.0 @@ -291,7 +291,13 @@ def beta_theory(self): return self.beta_value def get_constraints(self): - return self.packing.get_constraints() + old_elements = self.packing.get_constraints() + new_elements = [] + for element in old_elements: + if isinstance(element, np.ndarray): + element = torch.tensor(element) + new_elements.append(element) + return tuple(new_elements) def cov(self, inverse=False): return self.packing.cov(inverse=inverse) @@ -355,8 +361,8 @@ def sample_mirror_langevin(self, steps=500, verbose=False): l, Lambda, u = self.get_constraints() Gamma_half, invGamma_half = self.cov(inverse=True) - v = torch.from_numpy((u + l) / 2.0).view(-1, 1) - S = torch.diag(torch.from_numpy(u - l).view(-1) / 2.0).double() + v = torch.tensor((u + l) / 2.0).view(-1, 1) + S = torch.diag(torch.tensor(u - l).view(-1) / 2.0).double() phis = self.phis.clone() @ invGamma_half @@ -429,8 +435,8 @@ def sample_mirror_langevin(self, steps=500, verbose=False): u_new = u + 0.01 l_new = l - 0.01 - v2 = torch.from_numpy((u_new + l_new) / 2.0).view(-1, 1) - S2 = torch.diag(torch.from_numpy(u_new - l_new).view(-1) / 2.0).double() + v2 = torch.tensor((u_new + l_new) / 2.0).view(-1, 1) + S2 = torch.diag(torch.tensor(u_new - l_new).view(-1) / 2.0).double() # y.data = torch.inverse(S2) @ (y.data - v2) y.data = torch.atanh(y.data) @@ -444,7 +450,7 @@ def sample_mirror_langevin(self, steps=500, verbose=False): ) L = float( scipy.sparse.linalg.eigsh( - W.numpy(), k=1, which="LM", return_eigenvectors=False, tol=1e-8 + W.cpu().numpy(), k=1, which="LM", return_eigenvectors=False, tol=1e-8 ) ) eta = 0.05 / (L + 1) @@ -473,12 +479,12 @@ def sample_projected_langevin(self, steps=300, verbose=False, stepsize=None): Gamma_half = self.packing.cov() def prox(x): - z = x.numpy() + z = x.cpu().numpy() theta = cp.Variable((self.get_m(), 1)) objective = cp.Minimize(cp.sum_squares(z - theta)) constraints = [] l, Lambda, u = self.get_constraints() - Lambda = Lambda @ Gamma_half.numpy() + Lambda = Lambda @ Gamma_half.cpu().numpy() constraints.append(Lambda @ theta >= l.reshape(-1, 1)) prob = cp.Problem(objective, constraints) prob.solve( @@ -488,7 +494,7 @@ def prox(x): eps_abs=1e-3, eps_rel=1e-3, ) - return torch.from_numpy(theta.value) + return torch.tensor(theta.value) if self.feedback == "count-record" and self.dual == False: if self.observations is not None: @@ -544,7 +550,7 @@ def prox(x): W = self.construct_covariance_matrix_laplace(minimal=True) L = float( scipy.sparse.linalg.eigsh( - W.numpy(), k=1, which="LM", return_eigenvectors=False, tol=1e-5 + W.cpu().numpy(), k=1, which="LM", return_eigenvectors=False, tol=1e-5 ) ) @@ -572,17 +578,17 @@ def sample_proximal_langevin_prox(self, steps=300, verbose=False, stepsize=None) Gamma_half, invGamma_half = self.packing.cov(inverse=True) # invGamma = invGamma_half.T @ invGamma_half l, Lambda, u = self.get_constraints() - Lambda = Lambda @ Gamma_half.numpy() + Lambda = Lambda @ Gamma_half.cpu().numpy() def prox(x): res = solve_qp( np.eye(self.get_m()), - x.numpy().reshape(-1), - C=Gamma_half.numpy(), + x.cpu().numpy().reshape(-1), + C=Gamma_half.cpu().numpy(), b=np.array(l), factorized=True, ) - return torch.from_numpy(res[0]).view(-1, 1) + return torch.tensor(res[0]).view(-1, 1) # theta_n = cp.Variable((self.get_m(), 1)) # x = cp.Parameter((self.get_m(), 1)) @@ -590,14 +596,14 @@ def prox(x): # # constraints = [] # l, Lambda, u = self.get_constraints() - # Lambda = Lambda @ Gamma_half.numpy() + # Lambda = Lambda @ Gamma_half.cpu().numpy() # constraints.append(Lambda @ theta_n >= l.reshape(-1, 1)) # constraints.append(Lambda @ theta_n <= u.reshape(-1, 1)) # # prob = cp.Problem(objective, constraints) # def prox(x): - # return Gamma_half @ torch.from_numpy(scipy.optimize.nnls(invGamma.numpy(), (invGamma_half@x).numpy().reshape(-1), maxiter = 1000)[0]).view(-1,1) + # return Gamma_half @ torch.tensor(scipy.optimize.nnls(invGamma.cpu().numpy(), (invGamma_half@x).numpy().reshape(-1), maxiter = 1000)[0]).view(-1,1) samples = [] @@ -673,7 +679,11 @@ def prox(x): W = self.construct_covariance_matrix_laplace(theta=theta) L = float( scipy.sparse.linalg.eigsh( - W.numpy(), k=1, which="LM", return_eigenvectors=False, tol=1e-3 + W.cpu().numpy(), + k=1, + which="LM", + return_eigenvectors=False, + tol=1e-3, ) ) if stepsize is not None: @@ -682,9 +692,9 @@ def prox(x): eta = 0.5 / L # prox calculate - # x.value = theta.numpy() + # x.value = theta.cpu().numpy() # prob.solve(solver=cp.OSQP, warm_start=True, verbose=False, eps_abs=1e-3, eps_rel=1e-3) - # proximal_theta = torch.from_numpy(theta_n.value) + # proximal_theta = torch.tensor(theta_n.value) # update step # theta = 0.5 * theta - eta * nabla(theta) + 0.5 * proximal_theta + np.sqrt(2 * eta) * w @@ -710,8 +720,8 @@ def sample_proximal_langevin_simple_prox(self, steps=300, verbose=False): Gamma_half, invGamma_half = self.packing.cov(inverse=True) l, Lambda, u = self.get_constraints() prox_simple = lambda x: torch.minimum( - torch.maximum(x.view(-1), torch.from_numpy(l).view(-1)), - torch.from_numpy(u).view(-1), + torch.maximum(x.view(-1), torch.tensor(l).view(-1)), + torch.tensor(u).view(-1), ).view(-1, 1) def prox(x): @@ -803,7 +813,7 @@ def prox(x): W = self.construct_covariance_matrix_laplace() L = float( scipy.sparse.linalg.eigsh( - W.numpy(), k=1, which="LM", return_eigenvectors=False, tol=1e-5 + W.cpu().numpy(), k=1, which="LM", return_eigenvectors=False, tol=1e-5 ) ) @@ -883,7 +893,7 @@ def sample_hessian_positive_langevin(self, steps=500, verbose=False, stepsize=No Gamma_half = self.packing.cov() lz, Lambda, u = self.get_constraints() - Lambda = torch.from_numpy(Lambda) @ Gamma_half + Lambda = torch.tensor(Lambda) @ Gamma_half y = ( self.b + 0.05 @@ -905,7 +915,7 @@ def sample_hessian_positive_langevin(self, steps=500, verbose=False, stepsize=No W = self.construct_covariance_matrix_laplace() L = float( scipy.sparse.linalg.eigsh( - W.numpy(), k=1, which="LM", return_eigenvectors=False, tol=1e-5 + W.cpu().numpy(), k=1, which="LM", return_eigenvectors=False, tol=1e-5 ) ) @@ -946,7 +956,7 @@ def objective(s): # x0 = y.reshape(-1).clone().detach().numpy() # res = minimize(objective, x0, backend='torch', method='Newton-CG', precision='float64', tol=1e-5, hvp_type='vhp') - # y.data = torch.from_numpy(res.x) + # y.data = torch.tensor(res.x) x0 = y.reshape(-1).clone() res = minimize_torch(objective, x0, method="newton-cg", tol=1e-5) @@ -962,7 +972,7 @@ def sample_mla_prime(self, steps=100, verbose=False, stepsize=None): Gamma_half, invGamma_half = self.packing.cov(inverse=True) invGamma = invGamma_half.T @ invGamma_half l, Lambda, u = self.get_constraints() - Lambda = torch.from_numpy(Lambda) @ Gamma_half + Lambda = torch.tensor(Lambda) @ Gamma_half if self.data is not None: if self.feedback == "count-record" and self.dual == False: @@ -1004,7 +1014,7 @@ def sample_mla_prime(self, steps=100, verbose=False, stepsize=None): W = invGamma_half.T @ self.construct_covariance_matrix_laplace() @ invGamma_half L = float( scipy.sparse.linalg.eigsh( - W.numpy(), k=1, which="LM", return_eigenvectors=False, tol=1e-5 + W.cpu().numpy(), k=1, which="LM", return_eigenvectors=False, tol=1e-5 ) ) @@ -1029,7 +1039,7 @@ def sample_mla_prime(self, steps=100, verbose=False, stepsize=None): w0 = eta * nabla_val.data + 1.0 / y.data # initial point for the solve - # w0 = -1./( torch.from_numpy(x.value)) + # w0 = -1./( torch.tensor(x.value)) # simulate f = lambda w, n: n / torch.abs(w) @@ -1089,7 +1099,7 @@ def sample_hessian_positive_langevin_2( W = self.construct_covariance_matrix_laplace(minimal=True) L = float( scipy.sparse.linalg.eigsh( - W.numpy(), k=1, which="LM", return_eigenvectors=False, tol=1e-5 + W.cpu().numpy(), k=1, which="LM", return_eigenvectors=False, tol=1e-5 ) ) @@ -1382,7 +1392,7 @@ def objective(theta): }, ) - self.rate = invGamma_half.cpu() @ torch.from_numpy(res.x) + self.rate = invGamma_half @ torch.tensor(res.x) print(res.message) return self.rate @@ -1399,9 +1409,9 @@ def penalized_likelihood(self, threads=None): if self.dual == False: # using all points without anchor points - phis = self.phis.numpy() + phis = self.phis.cpu().numpy() if self.observations is not None: - observations = self.observations.numpy() + observations = self.observations.cpu().numpy() objective = cp.Minimize( -cp.sum(cp.log(observations @ theta)) + cp.sum(phis @ theta) @@ -1420,8 +1430,8 @@ def penalized_likelihood(self, threads=None): tau = self.total_bucketized_time[mask].clone().numpy() if self.observations is not None: - observations = self.anchor_points_emb.numpy() - weights = self.anchor_weights.numpy() + observations = self.anchor_points_emb.cpu().numpy() + weights = self.anchor_weights.cpu().numpy() mask = weights > 0.0 objective = cp.Minimize( -cp.sum( @@ -1440,15 +1450,15 @@ def penalized_likelihood(self, threads=None): constraints = [] - Lambda = Lambda @ Gamma_half.numpy() + Lambda = (Lambda @ Gamma_half).cpu().numpy() - constraints.append(Lambda @ theta >= l) - constraints.append(Lambda @ theta <= u) + constraints.append(Lambda @ theta >= l.cpu().numpy()) + constraints.append(Lambda @ theta <= u.cpu().numpy()) prob = cp.Problem(objective, constraints) if self.rate is not None: - theta.value = self.rate.numpy() + theta.value = self.rate.cpu().numpy() try: prob.solve( @@ -1464,7 +1474,7 @@ def penalized_likelihood(self, threads=None): }, ) - self.rate = torch.from_numpy(theta.value) + self.rate = torch.tensor(theta.value) return self.rate except: print("Optimization failed. Using the old value.") @@ -1473,8 +1483,8 @@ def penalized_likelihood(self, threads=None): def penalized_likelihood_integral(self, threads=4): - phis = self.phis.numpy() - counts = self.counts.numpy() + phis = self.phis.cpu().numpy() + counts = self.counts.cpu().numpy() theta = cp.Variable(self.get_m()) l, Lambda, u = self.get_constraints() @@ -1491,7 +1501,7 @@ def penalized_likelihood_integral(self, threads=4): constraints.append(Lambda @ theta <= u) # if self.rate is not None: - # theta.value = self.rate.numpy() + # theta.value = self.rate.cpu().numpy() try: prob = cp.Problem(objective, constraints) prob.solve( @@ -1506,7 +1516,7 @@ def penalized_likelihood_integral(self, threads=4): mosek.dparam.intpnt_co_tol_rel_gap: 1e-4, }, ) - self.rate = torch.from_numpy(theta.value) + self.rate = torch.tensor(theta.value) except: print("Optimization failed. Using the old value.") print(prob.status) @@ -1549,7 +1559,7 @@ def bucketization(self): else: for index, elementary in enumerate(basic_sets): if S.inside(elementary) == True: - data_basic[index].append(torch.Tensor([])) + data_basic[index].append(torch.tensor([])) counts[index] += 1 sensing_times[index].append(dt) @@ -1647,7 +1657,7 @@ def least_squares_weighted(self, threads=4): }, ) print(prob.status) - self.rate = torch.from_numpy(theta.value) + self.rate = torch.tensor(theta.value) return self.rate def least_sqaures_weighted_fast(self, threads=4): @@ -1688,7 +1698,7 @@ def objective(theta): eps = 1e-4 res = minimize( objective, - theta0.numpy(), + theta0.cpu().numpy(), backend="torch", method="L-BFGS-B", bounds=(l[0] + eps, u[0]), @@ -1703,7 +1713,7 @@ def objective(theta): "maxls": 20, }, ) - self.rate = invGamma_half @ torch.from_numpy(res.x) + self.rate = invGamma_half @ torch.tensor(res.x) return self.rate @@ -1718,12 +1728,12 @@ def least_squares_weighted_integral(self, threads=4): phis = self.phis.clone().numpy() # integrated actions if self.rate is None: - rate = torch.pinverse(torch.from_numpy(Gamma_half)) @ torch.from_numpy(u) + rate = torch.pinverse(torch.tensor(Gamma_half)) @ torch.tensor(u) else: rate = self.rate.clone() if len(self.variances_histogram) > 0: - variances = self.variances_histogram.numpy() + variances = self.variances_histogram.cpu().numpy() for i in range(variances.shape[0]): variances[i] = variances[i] * self.variance_correction(variances[i]) @@ -1760,7 +1770,7 @@ def least_squares_weighted_integral(self, threads=4): }, ) - self.rate = torch.from_numpy(theta.value) + self.rate = torch.tensor(theta.value) return self.rate @@ -1788,14 +1798,14 @@ def penalized_likelihood_bins(self, threads=4): try: prob.solve(solver=cp.CLARABEL, warm_start=False, verbose=True) - self.rate = torch.from_numpy(theta.value) + self.rate = torch.tensor(theta.value) except: print("optimization failed.") return self.rate def penalized_likelihood_integral_bins(self, threads=4): - phis = self.phis.numpy() - counts = self.counts.numpy() + phis = self.phis.cpu().numpy() + counts = self.counts.cpu().numpy() theta = cp.Variable(self.get_m()) l, Lambda, u = self.get_constraints() @@ -1817,7 +1827,7 @@ def penalized_likelihood_integral_bins(self, threads=4): else: prob = cp.Problem(objective) prob.solve(solver=cp.CLARABEL, warm_start=False, verbose=True) - self.rate = torch.from_numpy(theta.value) + self.rate = torch.tensor(theta.value) except: print("Optimization failed. Using the old value.") @@ -1841,10 +1851,10 @@ def update_variances(self, value=False, force=False): new_var = [] for S, _, dt in self.data: new_var.append(float(self.ucb(S)) * dt) - self.variances_histogram = torch.Tensor(new_var.copy()).double() + self.variances_histogram = torch.tensor(new_var.copy()).double() else: last = self.data[-1] - new_var = torch.Tensor([self.ucb(last[0]) * last[2]]).double() + new_var = torch.tensor([self.ucb(last[0]) * last[2]]).double() if len(self.variances_histogram) > 0: self.variances_histogram = torch.cat( (self.variances_histogram, new_var) @@ -1898,10 +1908,22 @@ def mean_std_per_action(self, S, W, dt, beta): Lambda = Lambda @ Gamma_half ucb, _ = maximize_on_elliptical_slice( - phi.numpy(), (W).numpy(), self.rate.view(-1).numpy(), beta, l, Lambda, u + phi.cpu().numpy(), + (W).numpy(), + self.rate.view(-1).cpu().numpy(), + beta, + l, + Lambda, + u, ) lcb, _ = maximize_on_elliptical_slice( - -phi.numpy(), (W).numpy(), self.rate.view(-1).numpy(), beta, l, Lambda, u + -phi.cpu().numpy(), + (W).numpy(), + self.rate.view(-1).cpu().numpy(), + beta, + l, + Lambda, + u, ) map = phi @ self.rate @@ -1936,8 +1958,8 @@ def mean_var_ratio_set(self, S, dt, beta=2.0): + 0.5 * self.s * torch.norm(self.rate) ** 2 ) - phis = self.phis.numpy() - counts = self.counts.numpy() + phis = self.phis.cpu().numpy() + counts = self.counts.cpu().numpy() theta = cp.Variable(self.get_m()) l, Lambda, u = self.get_constraints() Gamma_half = self.cov().numpy() @@ -2120,10 +2142,10 @@ def gap(self, S, actions, w, dt, beta=2.0): ucbs = [] for action in actions: phi_a = self.packing.integral(action) * dt - # ucb, _ = maximize_on_elliptical_slice(phi_a.numpy()-phi.numpy(), self.W.numpy(), self.rate.view(-1).numpy(), beta, l, Lambda, u) + # ucb, _ = maximize_on_elliptical_slice(phi_a.cpu().numpy()-phi.cpu().numpy(), self.W.cpu().numpy(), self.rate.view(-1).numpy(), beta, l, Lambda, u) ucb, _ = maximize_on_elliptical_slice( - phi.numpy(), - self.W.numpy(), + phi.cpu().numpy(), + self.W.cpu().numpy(), self.rate.view(-1).numpy(), beta, l, @@ -2131,7 +2153,7 @@ def gap(self, S, actions, w, dt, beta=2.0): u, ) ucbs.append(float(ucb)) - gap = torch.max(torch.Tensor(ucbs)) + gap = torch.max(torch.tensor(ucbs)) else: if self.data is None: @@ -2271,7 +2293,7 @@ def map_lcb_ucb(self, S, n, beta=2.0): for i in range(N): x = Phi[i, :] ucbi, _ = maximize_on_elliptical_slice( - x.numpy(), + x.cpu().numpy(), (W).numpy(), self.rate.view(-1).numpy(), np.sqrt(beta), @@ -2280,7 +2302,7 @@ def map_lcb_ucb(self, S, n, beta=2.0): u, ) lcbi, _ = maximize_on_elliptical_slice( - -x.numpy(), + -x.cpu().numpy(), (W).numpy(), self.rate.view(-1).numpy(), np.sqrt(beta), @@ -2310,7 +2332,7 @@ def map_lcb_ucb_likelihood_ratio(self, S, n, delta=0.1, current=False): ucb = torch.zeros(size=(N, 1)).double() lcb = torch.zeros(size=(N, 1)).double() - phis = self.phis.numpy() + phis = self.phis.cpu().numpy() if current: if self.observations is not None: @@ -2347,7 +2369,7 @@ def map_lcb_ucb_likelihood_ratio(self, S, n, delta=0.1, current=False): Lambda = Lambda @ Gamma_half for i in range(N): - x = Phi[i, :].numpy() + x = Phi[i, :].cpu().numpy() theta = cp.Variable(self.get_m()) @@ -2360,7 +2382,7 @@ def map_lcb_ucb_likelihood_ratio(self, S, n, delta=0.1, current=False): if self.feedback == "count-record": if self.observations is not None: - observations = self.observations.numpy() + observations = self.observations.cpu().numpy() constraints.append( -cp.sum(cp.log(observations @ theta)) @@ -2416,7 +2438,7 @@ def conformal_score_func(self, theta, new, index): if n > 0: phis = self.varphis[index].repeat(n, 1) - res = torch.Tensor(self.bucketized_obs[index]).double() + res = torch.tensor(self.bucketized_obs[index]).double() err = torch.abs(res - (phis @ theta.view(-1, 1)).view(-1)) @@ -2445,9 +2467,9 @@ def conformal_confidence(self, delta=0.05, max_val=20, dt=1, step=1): lcb.append(l) return ( - torch.Tensor(map).double(), - torch.Tensor(ucb).double(), - torch.Tensor(lcb).double(), + torch.tensor(map).double(), + torch.tensor(ucb).double(), + torch.tensor(lcb).double(), ) def conformal_confidence_set(self, S, delta=0.05, max_val=20, dt=1.0, step=1): @@ -2485,7 +2507,7 @@ def conformal_confidence_set(self, S, delta=0.05, max_val=20, dt=1.0, step=1): if j > 0: obs = torch.zeros(size=(j, self.d)).double() for i in range(self.d): - obs[:, i] = torch.from_numpy( + obs[:, i] = torch.tensor( np.random.uniform(S.bounds[i, 0], S.bounds[i, 1], size=j) ) else: @@ -2522,7 +2544,7 @@ def conformal_confidence_set(self, S, delta=0.05, max_val=20, dt=1.0, step=1): if j > 0: obs = torch.zeros(size=(j, self.d)).double() for i in range(self.d): - obs[:, i] = torch.from_numpy( + obs[:, i] = torch.tensor( np.random.uniform(S.bounds[i, 0], S.bounds[i, 1], size=j) ) else: diff --git a/stpy/point_processes/positive_basis_estimator.py b/stpy/point_processes/positive_basis_estimator.py index d76a422..d404af6 100644 --- a/stpy/point_processes/positive_basis_estimator.py +++ b/stpy/point_processes/positive_basis_estimator.py @@ -36,7 +36,7 @@ def load_data(self, data, times=True): for sample in data: S, obs, dt = sample - count = torch.Tensor([0]) + count = torch.tensor([0]) if obs is not None: if times == True: @@ -46,7 +46,7 @@ def load_data(self, data, times=True): phi = self.packing.integral(S) * dt observations.append(emb) - count = torch.Tensor([emb.size()[0]]) + count = torch.tensor([emb.size()[0]]) phis.append(phi.view(1, -1)) if self.dual == True: @@ -95,7 +95,7 @@ def add_data_point(self, new_data, times=True): phi = self.packing.integral(S).view(1, -1) * dt - count = torch.Tensor([emb.size()[0]]) + count = torch.tensor([emb.size()[0]]) if self.observations is not None: self.observations = torch.cat((self.observations, emb), dim=0) @@ -111,7 +111,7 @@ def add_data_point(self, new_data, times=True): index = torch.argmin(dist_matrix[k, :]) self.anchor_weights[index] += 1.0 else: - count = torch.Tensor([0]) + count = torch.tensor([0]) phi = self.packing.integral(S).view(1, -1) * dt self.phis = torch.cat((self.phis, phi), dim=0) diff --git a/stpy/point_processes/rate_estimator.py b/stpy/point_processes/rate_estimator.py index 7f3dc1f..f666965 100644 --- a/stpy/point_processes/rate_estimator.py +++ b/stpy/point_processes/rate_estimator.py @@ -48,7 +48,7 @@ def load_data(self, data: List, times=True): x = [] for sample in data: S, obs, dt = sample - count = torch.Tensor([0]) + count = torch.tensor([0]) if obs is not None: x.append(obs) @@ -61,7 +61,7 @@ def load_data(self, data: List, times=True): phi = self.packing.integral(S) * dt observations.append(emb) - count = torch.Tensor([emb.size()[0]]) + count = torch.tensor([emb.size()[0]]) phis.append(phi.view(1, -1)) if self.dual == True: @@ -114,7 +114,7 @@ def add_data_point(self, new_data, times=True): phi = self.packing.integral(S).view(1, -1) * dt - count = torch.Tensor([emb.size()[0]]) + count = torch.tensor([emb.size()[0]]) if self.observations is not None: self.observations = torch.cat((self.observations, emb), dim=0) @@ -130,7 +130,7 @@ def add_data_point(self, new_data, times=True): index = torch.argmin(dist_matrix[k, :]) self.anchor_weights[index] += 1.0 else: - count = torch.Tensor([0]) + count = torch.tensor([0]) phi = self.packing.integral(S).view(1, -1) * dt self.phis = torch.cat((self.phis, phi), dim=0) diff --git a/stpy/point_processes/seasonal_point_process.py b/stpy/point_processes/seasonal_point_process.py index 50cb824..c75cf55 100644 --- a/stpy/point_processes/seasonal_point_process.py +++ b/stpy/point_processes/seasonal_point_process.py @@ -31,13 +31,13 @@ def rate_volume(self, S, t, dt=1, rate=None): if self.d == 1: # integrate = S.volume()* self.rate(torch.from_numpy(S.bounds[0,1]).view(1)) integral, _ = integrate.quad( - lambda x: rate(torch.Tensor([x]).view(1, 1), t).numpy(), + lambda x: rate(torch.tensor([x]).view(1, 1), t).numpy(), float(S.bounds[0, 0]), float(S.bounds[0, 1]), ) elif self.d == 2: integrand = lambda x, y: rate( - torch.Tensor([x, y], t).view(1, 2).double() + torch.tensor([x, y], t).view(1, 2).double() ).numpy() integral, _ = integrate.dblquad( integrand, diff --git a/stpy/probability/likelihood.py b/stpy/probability/likelihood.py index 586f9ba..939cd12 100644 --- a/stpy/probability/likelihood.py +++ b/stpy/probability/likelihood.py @@ -81,7 +81,7 @@ def lr_confidence_set_cvxpy(self, theta, beta, params): :param params: :return: """ - evidence = torch.Tensor(params["evidence"]).bool() + evidence = torch.tensor(params["evidence"]).bool() self.set_fn = lambda theta: [ self.get_objective_cvxpy(mask=evidence)(theta) <= beta ] diff --git a/stpy/test_functions/benchmarks.py b/stpy/test_functions/benchmarks.py index 3b4907f..8432ca4 100755 --- a/stpy/test_functions/benchmarks.py +++ b/stpy/test_functions/benchmarks.py @@ -173,7 +173,7 @@ def __init__(self, **kwargs): print("Quadratic Problem: Additive.") def eval_noiseless(self, X): - D = torch.diag(torch.Tensor([1.0, 2.0]).double()) + D = torch.diag(torch.tensor([1.0, 2.0]).double()) super().eval_noiseless(X) (n, d) = X.size() X = X @ self.R @@ -201,7 +201,7 @@ def __init__(self, **kwargs): print("Quadratic Problem: Additive.") def eval_noiseless(self, X): - D = torch.diag(torch.Tensor([1.0, 2.0]).double()) + D = torch.diag(torch.tensor([1.0, 2.0]).double()) super().eval_noiseless(X) (n, d) = X.size() X = X @ self.R @@ -370,7 +370,7 @@ def optimize(self, xtest, sigma, restarts=5, n=512): # self.gamma = torch.min(kernel.gamma) # self.gamma = torch.zeros(1,1,dtype = torch.DoubleTensor) # self.gamma[0,0] =0.35 - self.gamma = torch.Tensor([0.35]).double() + self.gamma = torch.tensor([0.35]).double() return self.gamma diff --git a/tests/SRI_test.py b/tests/SRI_test.py index a1bf5d0..e6b97ae 100755 --- a/tests/SRI_test.py +++ b/tests/SRI_test.py @@ -4,7 +4,7 @@ def get_angle(R): - v = torch.Tensor([1.0, 1.0]).double() + v = torch.tensor([1.0, 1.0]).double() a1 = np.arccos((torch.dot(v, R @ v) / torch.dot(v, v)).numpy()) a2 = np.arccos(-(torch.dot(v, R @ v) / torch.dot(v, v)).numpy()) return np.min([a1, a2]) @@ -55,7 +55,7 @@ def get_angle(R): embedding = HermiteEmbedding(gamma=gamma, m=m, d=d, diameter=1, approx="hermite") Map = lambda x: embedding.embed(x) - x0 = torch.Tensor([0.0, 0.0]).double().view(-1, d) + x0 = torch.tensor([0.0, 0.0]).double().view(-1, d) # Bandit = OPPR_TS_GP(x0, F, GP, Map, finite_dim=False, s = 10e-8) Bandit = OPPR_TS_GP(x0, F, GP, Map, finite_dim=True, s=s, GPMap=True) diff --git a/tests/continous_processes/test_estimators/domain_non_stationarity.py b/tests/continous_processes/test_estimators/domain_non_stationarity.py index 1e09e57..b772dfb 100644 --- a/tests/continous_processes/test_estimators/domain_non_stationarity.py +++ b/tests/continous_processes/test_estimators/domain_non_stationarity.py @@ -53,8 +53,8 @@ def zeroing(X): # X = torch.rand(size = (N,d)).double()*0.25+0.5 # y = F(X) # -# Xpoint = torch.Tensor([[0.],[0.5]]).double() -# ypoint = torch.Tensor([[0.],[0.]]).double() +# Xpoint = torch.tensor([[0.],[0.5]]).double() +# ypoint = torch.tensor([[0.],[0.]]).double() # # X = torch.vstack([X,Xpoint]) # y = torch.vstack([y,ypoint]) diff --git a/tests/continous_processes/test_estimators/qff_nonstationary.py b/tests/continous_processes/test_estimators/qff_nonstationary.py index 576f73c..97f0fb3 100644 --- a/tests/continous_processes/test_estimators/qff_nonstationary.py +++ b/tests/continous_processes/test_estimators/qff_nonstationary.py @@ -75,8 +75,8 @@ def zero_out_interval(x, interval): X = torch.rand(size=(10, d)).double() * 0.25 + 0.1 y = F(X) -# Xpoint = torch.Tensor([[0.],[0.5]]).double() -# ypoint = torch.Tensor([[0.],[0.]]).double() +# Xpoint = torch.tensor([[0.],[0.5]]).double() +# ypoint = torch.tensor([[0.],[0.]]).double() # X = torch.vstack([X,Xpoint]) # y = torch.vstack([y,ypoint]) diff --git a/tests/continous_processes/test_estimators/test_regularized_dictionary_l2.py b/tests/continous_processes/test_estimators/test_regularized_dictionary_l2.py index 640468b..cdcbe48 100644 --- a/tests/continous_processes/test_estimators/test_regularized_dictionary_l2.py +++ b/tests/continous_processes/test_estimators/test_regularized_dictionary_l2.py @@ -82,7 +82,7 @@ def decay_function(emb): N = 1 v = torch.randn(size=(m, 1)).double() F = lambda X: embedding.embed(X) @ v -X = torch.Tensor([[0.5]]).double() +X = torch.tensor([[0.5]]).double() y = F(X) xtest = interval_torch(n=n, d=1) diff --git a/tests/fourier-features-multidimensional.py b/tests/fourier-features-multidimensional.py index a6bc381..c46254b 100644 --- a/tests/fourier-features-multidimensional.py +++ b/tests/fourier-features-multidimensional.py @@ -21,7 +21,7 @@ def cost_function(): ytest = F(xtest) GP = GaussianProcessFF( - d=2, groups=[[0, 1]], m=torch.Tensor([m, 64]), gamma=torch.Tensor([0.2]) + d=2, groups=[[0, 1]], m=torch.tensor([m, 64]), gamma=torch.tensor([0.2]) ) GP.fit_gp(xtest, ytest) diff --git a/tests/hessian-estimation-test.py b/tests/hessian-estimation-test.py index bcae5ac..6c287c6 100755 --- a/tests/hessian-estimation-test.py +++ b/tests/hessian-estimation-test.py @@ -13,8 +13,8 @@ thetae = np.radians(35.0) ce, se = np.cos(thetae), np.sin(thetae) R = torch.from_numpy(np.array(((ce, -se), (se, ce)))) -D = torch.diag(torch.Tensor([0.8, 1.1]).double()) -# D = torch.diag(torch.Tensor([1, 1]).double()) +D = torch.diag(torch.tensor([0.8, 1.1]).double()) +# D = torch.diag(torch.tensor([1, 1]).double()) W = R.T @ D @ R print(W) @@ -74,10 +74,10 @@ def plot_contour(xtest, ytest, lim=None): Map = lambda x: embedding.embed(x) # Starting points -x0_1 = torch.Tensor([0.1, 0.1]).double().view(-1, d) +x0_1 = torch.tensor([0.1, 0.1]).double().view(-1, d) -# x0_1 = torch.Tensor([-0.1, 0.]).double().view(-1, d) -x0_2 = torch.Tensor([0.1, 0.1]).double().view(-1, d) +# x0_1 = torch.tensor([-0.1, 0.]).double().view(-1, d) +x0_2 = torch.tensor([0.1, 0.1]).double().view(-1, d) print("Embeding size:", Map(x0_1).size()) From 0e035a684ef918850e1eab59712a0ab27f4f7b58 Mon Sep 17 00:00:00 2001 From: peibensteine Date: Sun, 1 Dec 2024 16:07:39 +0100 Subject: [PATCH 22/39] improve posterior sampling print statements --- stpy/helpers/posterior_sampling.py | 36 +++++++++++++++++++----------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/stpy/helpers/posterior_sampling.py b/stpy/helpers/posterior_sampling.py index 632f0e4..46d3472 100644 --- a/stpy/helpers/posterior_sampling.py +++ b/stpy/helpers/posterior_sampling.py @@ -1,4 +1,7 @@ +import sys import numpy as np +import scipy +from tqdm import tqdm # Python implementation of "Exact Hamiltonian Monte Carlo for Truncated Multivariate Gaussian" @@ -140,11 +143,9 @@ def sampleNext(self): if check >= 0: # verify that we don't violate the constraints # due to a numerical instability - if self.verbose: - print("total number of velocity samples : %d" % count_sample_vel) self.lastSample = bb - return bb + return bb, count_sample_vel def tmg(n, mu, M, initial, f=None, g=None, burn_in=30, verbose=False): @@ -186,8 +187,8 @@ def tmg(n, mu, M, initial, f=None, g=None, burn_in=30, verbose=False): if f is not None: if f.shape[0] != len(g) or f.shape[1] != dim: raise ValueError( - "Inconsistent linear constraints. f must \ - be an d-by-m matrix and g an d-dimensional vector." + "Inconsistent linear constraints. f must " + " be an d-by-m matrix and g an d-dimensional vector." ) # g may contains infinity, extract valid constraints valid = np.logical_and(g < np.inf, g > -np.inf) @@ -207,14 +208,23 @@ def tmg(n, mu, M, initial, f=None, g=None, burn_in=30, verbose=False): hmc = HmcSampler(dim, init_trans, f, g, verbose=verbose) samples = np.zeros((n, dim)) - for i in range(burn_in): - if verbose: - print("=" * 30 + " (burn in) sample {} ".format(i) + "=" * 30) - hmc.sampleNext() - for i in range(n): - if verbose: - print("=" * 30 + " sample {} ".format(i) + "=" * 30) - samples[i] = hmc.sampleNext() + for num_steps, desc in [(burn_in, "Burn-In"), (n, "sampling")]: + progress_bar = tqdm(range(num_steps), desc=desc, position=0) + numbers_bar = tqdm(total=1, bar_format="{desc}", position=1) + count_sample_vels = [] + + for i in progress_bar: + s, count_sample_vel = hmc.sampleNext() + if desc == "sampling": + samples[i] = s + + if hmc.verbose: + count_sample_vels.append(count_sample_vel) + numbers_bar.set_description( + "\rtotal number of velocity samples:" + f" {', '.join(map(str, count_sample_vels))}" + ) + numbers_bar.refresh() # transform back return samples @ R.T + mu From f83af69aa9aae865be8a39c954f879a51a787207 Mon Sep 17 00:00:00 2001 From: peibensteine Date: Sun, 1 Dec 2024 16:08:47 +0100 Subject: [PATCH 23/39] Fix the squared exp integral by using gamma correctly (usually sigma) --- .../squared_exponential_kernel.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/stpy/kernel_functions/squared_exponential_kernel.py b/stpy/kernel_functions/squared_exponential_kernel.py index f0d8db0..7b6fe26 100644 --- a/stpy/kernel_functions/squared_exponential_kernel.py +++ b/stpy/kernel_functions/squared_exponential_kernel.py @@ -41,7 +41,7 @@ def squared_exponential_kernel_diag(a, b, **kwargs): def squared_exponential_integral(a_x, a_y, b_x, b_y, **kwargs): """ - Returns a function that computes g(x) for multiple 2D points x given lower and upper bounds. + Returns $g(x) = \int_{a_1, b_1}^{a_2, b_2} \kappa \cdot \exp(-\gamma \| x - s \|^2 ds$ Parameters: - a_x: torch.Tensor, lower bounds in x-dimension (shape: [N]) @@ -51,12 +51,14 @@ def squared_exponential_integral(a_x, a_y, b_x, b_y, **kwargs): - kwargs: should give attributes gamma (float) and kappa (float) Returns: - - A function `g(x)` that computes g(x) for input x (torch.Tensor of shape [M, 2]). + - A function such that squared_exponetial_integral(a_x, a_y, b_x, b_y)(x)[i][j] + is equal to $g(x_j)$ where $g$ is created from a_x[i], a_y[i], b_x[i], b_y[i] """ p = KernelParams(kwargs) p.assert_existence(["gamma", "kappa"]) gamma = p.gamma kappa = p.kappa + sqrt_2 = torch.sqrt(torch.tensor(2.0)) def g(x): """ @@ -75,16 +77,16 @@ def g(x): b_y_broadcast = b_y.unsqueeze(1) # Shape [N, 1] # Compute the error function terms - erf_x1_a = torch.erf((a_x_broadcast - x1) * torch.sqrt(torch.tensor(gamma))) - erf_x1_b = torch.erf((b_x_broadcast - x1) * torch.sqrt(torch.tensor(gamma))) - erf_x2_a = torch.erf((a_y_broadcast - x2) * torch.sqrt(torch.tensor(gamma))) - erf_x2_b = torch.erf((b_y_broadcast - x2) * torch.sqrt(torch.tensor(gamma))) + erf_x1_a = torch.erf((a_x_broadcast - x1) / (gamma * sqrt_2)) + erf_x1_b = torch.erf((b_x_broadcast - x1) / (gamma * sqrt_2)) + erf_x2_a = torch.erf((a_y_broadcast - x2) / (gamma * sqrt_2)) + erf_x2_b = torch.erf((b_y_broadcast - x2) / (gamma * sqrt_2)) # Compute the product of error function differences integral_values = (erf_x1_a - erf_x1_b) * (erf_x2_a - erf_x2_b) # Scale by constants - result = (torch.pi * kappa / (4 * gamma)) * integral_values + result = (torch.pi * kappa * (gamma**2) / 2.0) * integral_values return result From 0ca798d28f6f4b57bc9e35f7343590c7979f10f9 Mon Sep 17 00:00:00 2001 From: peibensteine Date: Sun, 1 Dec 2024 16:10:59 +0100 Subject: [PATCH 24/39] fix integral scaling in ExpGaussProcessRateEstimator --- .../link_fun_rate_estimator.py | 32 +++++++++++++------ 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/stpy/point_processes/link_fun_rate_estimator.py b/stpy/point_processes/link_fun_rate_estimator.py index a188513..ee51e4e 100644 --- a/stpy/point_processes/link_fun_rate_estimator.py +++ b/stpy/point_processes/link_fun_rate_estimator.py @@ -218,6 +218,7 @@ def load_data(self, data): self.sumLambda += self.product_integral(S) * dt else: self.S = data[0][0] + self.dt = data[0][2] assert isinstance(self.S, BorelSet) def add_data_point(self, new_data): @@ -409,21 +410,23 @@ def mean_rate(self, S, n=128): class ExpGaussProcessRateEstimator(PermanentalProcessRateEstimator): def penalized_likelihood(self, threads=4): + # ONLY WORKS WITH ONE DATASET given by load_data! # Get node function values and weights for Gauss-Legendre quadrature weights, nodes = self.S.return_legendre_discretization(n=50) - weights = np.array(weights) - vals = np.array(self.packing.embed_internal(nodes)) + nodes = nodes.to(torch.get_default_device()) + weights = weights.cpu().numpy() + vals = self.packing.embed(nodes).cpu().numpy() if self.observations is not None: - observations = self.observations.numpy() + observations = self.observations.cpu().numpy() loss = lambda theta: float( - np.sum(observations @ theta) - + np.sum(weights * np.exp(-theta @ vals)) - + self.s * np.sum(theta**2) + -np.sum(observations @ theta) + + self.dt * np.sum(weights * np.exp(theta @ vals.T)) + + self.s * 0.5 * np.sum(theta**2) ) else: loss = lambda theta: float( - np.sum(weights * np.exp(-theta @ nodes.T)) + self.s * np.sum(theta**2) + np.sum(weights * np.exp(theta @ nodes.T)) + self.s * np.sum(theta**2) ) theta = np.zeros(self.get_m()) @@ -442,13 +445,24 @@ def penalized_likelihood(self, threads=4): "gtol": 1e-8, }, ) - self.rate = torch.from_numpy(res.x) + self.rate = torch.tensor(res.x) return self.rate def mean_rate(self, S, n=128): xtest = S.return_discretization(n) - return torch.exp(-self.packing.embed(xtest) @ self.rate) + return torch.exp(self.packing.embed(xtest) @ self.rate) + + def rate_value(self, x, dt=1): + phi = self.packing.embed(x) * dt + + if self.rate is not None: + map = torch.exp(phi @ self.rate.view(-1, 1)) + else: + print("Rate function not fitted!") + map = 0 * phi[:, 0].view(-1, 1) + self.b + + return map if __name__ == "__main__": From 1fbe5277758a8b2434ef6e81173d8f11939262fc Mon Sep 17 00:00:00 2001 From: peibensteine Date: Sun, 1 Dec 2024 16:11:23 +0100 Subject: [PATCH 25/39] enable .fit on PoissonRateEstimator --- stpy/point_processes/poisson_rate_estimator.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/stpy/point_processes/poisson_rate_estimator.py b/stpy/point_processes/poisson_rate_estimator.py index 024ef00..f885904 100644 --- a/stpy/point_processes/poisson_rate_estimator.py +++ b/stpy/point_processes/poisson_rate_estimator.py @@ -302,6 +302,9 @@ def get_constraints(self): def cov(self, inverse=False): return self.packing.cov(inverse=inverse) + def fit(self): + self.fit_gp() + def fit_gp( self, threads=4, @@ -1309,6 +1312,8 @@ def penalized_likelihood_fast( self, device: torch.device = torch.get_default_device() ): l, Lambda, u = self.get_constraints() + # assert torch.allclose(Lambda, torch.eye(self.m**self.d)) + Gamma_half, invGamma_half = self.cov(inverse=True) invGamma_half = invGamma_half.to(device) From 42d7b7e0b82219618bf9df0b8aeba3b19c969bc7 Mon Sep 17 00:00:00 2001 From: peibensteine Date: Sun, 1 Dec 2024 16:11:42 +0100 Subject: [PATCH 26/39] Only run bucketization if self.dual --- stpy/point_processes/rate_estimator.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/stpy/point_processes/rate_estimator.py b/stpy/point_processes/rate_estimator.py index f666965..ed6dbcc 100644 --- a/stpy/point_processes/rate_estimator.py +++ b/stpy/point_processes/rate_estimator.py @@ -91,7 +91,7 @@ def load_data(self, data: List, times=True): else: self.observations = None - if self.feedback == "count-record": + if self.feedback == "count-record" and self.dual: self.bucketization() def add_data_point(self, new_data, times=True): @@ -209,3 +209,6 @@ def get_observations(self): return None else: return None + + def fit(self): + raise NotImplementedError() From b335dc8d09eecf1fb0646275cbea701ad962f616 Mon Sep 17 00:00:00 2001 From: peibensteine Date: Sun, 1 Dec 2024 16:12:25 +0100 Subject: [PATCH 27/39] Naive integral implementation for PPP for more efficient reference --- stpy/point_processes/poisson/poisson.py | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/stpy/point_processes/poisson/poisson.py b/stpy/point_processes/poisson/poisson.py index 74460dd..524f0c0 100644 --- a/stpy/point_processes/poisson/poisson.py +++ b/stpy/point_processes/poisson/poisson.py @@ -11,7 +11,9 @@ class PoissonPointProcess: """ - def __init__(self, d=1.0, B=1.0, b=0.2, rate=None, rate_volume=None): + def __init__( + self, d=1.0, B=1.0, b=0.2, rate=None, rate_volume=None, naive_integral=False + ): self.B = B self.d = d self.b = b @@ -22,6 +24,7 @@ def __init__(self, d=1.0, B=1.0, b=0.2, rate=None, rate_volume=None): self.rate_volume_f = rate_volume self.exact = True + self.naive_integral = naive_integral def rate_default(self, x, dt=1.0): return ( @@ -65,18 +68,24 @@ def rate_volume(self, S, dt=1, rate=None): else: return self.rate_volume_f(S) * dt - def sample_discretized(self, S, dt, n=100): - lam = np.maximum(float(self.rate_volume(S, dt)), 0) - count = np.random.poisson(lam=lam) + def sample_discretized(self, S: BorelSet, dt, n=100): + x = S.return_discretization(n).to(device=torch.get_default_device()) + r = self.rate(x) * dt + if self.naive_integral: + total_area = 1.0 + for bound in S.bounds: + total_area *= bound[1] - bound[0] + lam = r.sum() * (total_area / len(x)) + else: + lam = np.maximum(float(self.rate_volume(S, dt)), 0) + count = np.random.poisson(lam=lam.cpu().numpy()) if count > 0: - x = S.return_discretization(n) - r = self.rate(x) * dt r = torch.maximum(r, r * 0) sample = torch.from_numpy( np.random.choice( np.arange(0, x.size()[0], 1), size=count, - p=(r / torch.sum(r)).numpy().reshape(-1), + p=(r / torch.sum(r)).cpu().numpy().reshape(-1), ) ) return x[sample, :] @@ -187,7 +196,7 @@ def visualize(self, S, samples=2, n=10, dt=1.0, show=True): if __name__ == "__main__": d = 2 n = 100 - bounds = torch.Tensor([[-1, 1], [-1, 1]]).double() + bounds = torch.tensor([[-1, 1], [-1, 1]]).double() D = BorelSet(d, bounds) process = PoissonPointProcess(d=d, B=2) From b662e415e1ad0da45b7c3bf13a24f92d015b80d4 Mon Sep 17 00:00:00 2001 From: peibensteine Date: Sun, 8 Dec 2024 22:24:04 +0100 Subject: [PATCH 28/39] Implement roi for optimal basis and fix interpolation outside convex set --- stpy/continuous_processes/nystrom_fea.py | 77 +++++++++++++++---- stpy/embeddings/optimal_positive_basis.py | 31 ++++++-- .../point_processes/poisson_rate_estimator.py | 6 +- 3 files changed, 90 insertions(+), 24 deletions(-) diff --git a/stpy/continuous_processes/nystrom_fea.py b/stpy/continuous_processes/nystrom_fea.py index dec4f0a..9175c5d 100755 --- a/stpy/continuous_processes/nystrom_fea.py +++ b/stpy/continuous_processes/nystrom_fea.py @@ -1,10 +1,11 @@ import matplotlib.pyplot as plt -from scipy.interpolate import LinearNDInterpolator +from scipy.interpolate import LinearNDInterpolator, NearestNDInterpolator from scipy.interpolate import interp1d from stpy.continuous_processes.gauss_procc import GaussianProcess from stpy.embeddings.embedding import * from stpy.helpers.helper import * +from stpy.helpers.posterior_sampling import tmg from stpy.kernels import KernelFunction @@ -13,7 +14,13 @@ class NystromFeatures(Embedding): Nystrom Features for Gaussian Kernel """ - def __init__(self, kernel_object, m=100, approx="uniform", s=1.0, samples=100): + def __init__( + self, kernel_object, m=100, approx="uniform", s=1.0, samples=100, fast=True + ): + """ + fast, optional + If it is true, the samples from the truncated Gaussian are approximated by squared samples of a Gaussian, by default True + """ self.fit = False self.m = m @@ -26,6 +33,7 @@ def __init__(self, kernel_object, m=100, approx="uniform", s=1.0, samples=100): self.kernel = kernel_object.kernel self.approx = approx self.s = s + self.fast = fast def description(self): """ @@ -145,9 +153,25 @@ def fit_gp(self, x, y, eps=1e-14): elif self.approx == "positive_svd": from sklearn.decomposition import NMF - GP = GaussianProcess(kernel=self.kernel_object) - ysample = GP.sample(x, size=self.samples) ** 2 - X = ysample + if self.fast: + GP = GaussianProcess(kernel=self.kernel_object) + ysample = GP.sample(x, size=self.samples) ** 2 + X = ysample + else: + burn_in = 30 + ysample = tmg( + self.samples, + np.zeros(len(x)), + self.kernel_object.kernel(x, x).cpu().numpy() + + 1e-7 * np.eye(len(x)), + torch.ones(len(x)).cpu().numpy(), + np.eye(len(x)), + np.zeros(len(x)), + burn_in, + True, + ) + X = torch.tensor(ysample.T) + model = NMF(n_components=self.ms, max_iter=8000, tol=1e-12) W = torch.tensor(model.fit_transform(X.cpu())) H = torch.tensor(model.components_) @@ -169,19 +193,42 @@ def fit_gp(self, x, y, eps=1e-14): ) elif x.size()[1] == 2: + fs = [] for j in range(self.ms): + # each column of W is one \phi_i that is normalized to \|phi_i\|_2=1 W_j = (W.T @ torch.diag(l))[j, :].cpu().numpy() - fs.append(LinearNDInterpolator(x.cpu().numpy(), W_j)) - self.embed = lambda q: torch.cat( - [ - torch.tensor( - fs[j](q[:, 0].cpu().numpy(), q[:, 1].cpu().numpy()) - ).view(-1, 1) - for j in range(self.ms) - ], - dim=1, - ) + fs.append( + ( + LinearNDInterpolator(x.cpu().numpy(), W_j), + NearestNDInterpolator(x.cpu().numpy(), W_j), + ) + ) + + def embed(q): + out_list = [] + # Interpolate for points inside convex set else Nearest Neighbor + for j in range(self.ms): + cur = fs[j][0](q[:, 0].cpu().numpy(), q[:, 1].cpu().numpy()) + mask = np.isnan(cur) + cur[mask] = fs[j][1]( + q[:, 0].cpu().numpy()[mask], q[:, 1].cpu().numpy()[mask] + ) + out_list.append(torch.tensor(cur).view(-1, 1)) + return torch.cat(out_list, dim=1) + + self.embed = embed + + # self.embed = lambda q: torch.cat( + # [ + # torch.tensor( + # fs[j](q[:, 0].cpu().numpy(), q[:, 1].cpu().numpy()) + # ).view(-1, 1) + # for j in range(self.ms) + # ], + # dim=1, + # ) + # elif x.size()[1] == 2: # fs = [] # for j in range(self.ms): diff --git a/stpy/embeddings/optimal_positive_basis.py b/stpy/embeddings/optimal_positive_basis.py index 096c4e5..782ab42 100644 --- a/stpy/embeddings/optimal_positive_basis.py +++ b/stpy/embeddings/optimal_positive_basis.py @@ -1,4 +1,5 @@ import pickle +from typing import Optional import numpy as np import scipy @@ -13,18 +14,32 @@ class OptimalPositiveBasis(PositiveEmbedding): def __init__( - self, *args, samples=300, discretization_size=30, saved=False, **kwargs + self, + *args, + samples=300, + discretization_size=30, + saved=False, + roi: torch.Tensor | BorelSet | None = None, + **kwargs ): + # roi is the set of points that the basis is optimal for if it is a tensor + # else it is the region that the basis if optimal for that will be discretized + # by discretization_size. If it is not given the entire domain will be used. super().__init__(*args, **kwargs) self.samples = np.maximum(samples, self.m) - B = BorelSet( - self.d, - torch.tensor( - [[self.interval[0], self.interval[1]] for _ in range(self.d)] - ).double(), - ) - self.discretized_domain = B.return_discretization(discretization_size) + if roi is None: + B = BorelSet( + self.d, + torch.tensor( + [[self.interval[0], self.interval[1]] for _ in range(self.d)] + ).double(), + ) + self.discretized_domain = B.return_discretization(discretization_size) + elif isinstance(roi, BorelSet): + self.discretized_domain = roi.return_discretization(discretization_size) + else: + self.discretized_domain = roi y = self.discretized_domain[:, 0].view(-1, 1) * 0 diff --git a/stpy/point_processes/poisson_rate_estimator.py b/stpy/point_processes/poisson_rate_estimator.py index f885904..ca20358 100644 --- a/stpy/point_processes/poisson_rate_estimator.py +++ b/stpy/point_processes/poisson_rate_estimator.py @@ -4,7 +4,7 @@ import mosek import numpy as np import scipy -from stpy.borel_set import HierarchicalBorelSets +from stpy.borel_set import BorelSet, HierarchicalBorelSets from stpy.embeddings.embedding import Embedding from stpy.kernels import KernelFunction import torch @@ -59,6 +59,8 @@ def __init__( no_anchor_points=1024, U=1.0, optimization_library="torch", + roi: torch.Tensor | BorelSet | None = None, + roi_discretization: int = 30, ): self.d = d """ Dimension of the data """ @@ -160,6 +162,8 @@ def __init__( offset=offset, s=np.sqrt(jitter), samples=samples_nystrom, + roi=roi, + discretization_size=roi_discretization, ) elif basis == "custom": assert embedding is not None From fce9b5f12601262ed254b72524c57e024bfa6462 Mon Sep 17 00:00:00 2001 From: peibensteine Date: Thu, 23 Jan 2025 17:26:43 +0100 Subject: [PATCH 29/39] fix optimal basis by norming over the right dimension --- stpy/continuous_processes/nystrom_fea.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/stpy/continuous_processes/nystrom_fea.py b/stpy/continuous_processes/nystrom_fea.py index 9175c5d..fa30fff 100755 --- a/stpy/continuous_processes/nystrom_fea.py +++ b/stpy/continuous_processes/nystrom_fea.py @@ -15,7 +15,7 @@ class NystromFeatures(Embedding): """ def __init__( - self, kernel_object, m=100, approx="uniform", s=1.0, samples=100, fast=True + self, kernel_object, m=100, approx="uniform", s=1.0, samples=100, fast=False ): """ fast, optional @@ -175,8 +175,7 @@ def fit_gp(self, x, y, eps=1e-14): model = NMF(n_components=self.ms, max_iter=8000, tol=1e-12) W = torch.tensor(model.fit_transform(X.cpu())) H = torch.tensor(model.components_) - l = torch.norm(W, dim=1) - l = 1.0 / l + W_norm = W / torch.linalg.norm(W, dim=0) if x.size()[1] == 1: fs = [] @@ -184,7 +183,7 @@ def fit_gp(self, x, y, eps=1e-14): fs.append( interp1d( x.view(-1).cpu().numpy(), - (W.T @ torch.diag(l))[j, :].cpu().numpy(), + W_norm[:, j].cpu().numpy(), ) ) self.embed = lambda q: torch.cat( @@ -197,7 +196,7 @@ def fit_gp(self, x, y, eps=1e-14): fs = [] for j in range(self.ms): # each column of W is one \phi_i that is normalized to \|phi_i\|_2=1 - W_j = (W.T @ torch.diag(l))[j, :].cpu().numpy() + W_j = W_norm[:, j].cpu().numpy() fs.append( ( LinearNDInterpolator(x.cpu().numpy(), W_j), From b8b99d6a614647112cec849a5431f8a7a476dbd1 Mon Sep 17 00:00:00 2001 From: peibensteine Date: Sun, 2 Feb 2025 13:47:54 +0100 Subject: [PATCH 30/39] add truncated gp class --- stpy/continuous_processes/truncated_gp.py | 84 +++++++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100644 stpy/continuous_processes/truncated_gp.py diff --git a/stpy/continuous_processes/truncated_gp.py b/stpy/continuous_processes/truncated_gp.py new file mode 100644 index 0000000..91d05a6 --- /dev/null +++ b/stpy/continuous_processes/truncated_gp.py @@ -0,0 +1,84 @@ +import numpy as np +from stpy.continuous_processes.gauss_procc import GaussianProcess +from stpy.helpers.posterior_sampling import tmg +import torch + + +class TruncatedGP: + """ + A truncated Gaussian Process that can serve as a ground truth model + for the PPP estimators. Sampling is very slow at the moment + """ + + def __init__(self, kernel, d): + self.gp = GaussianProcess(kernel=kernel, d=d) + self.x_acc = None + self.y_acc = None + + def __call__(self, x: torch.tensor, dt: float = 1.0, burn_in=30): + N = len(x) + # Initialize sample array + sample = torch.zeros(N) + + if self.x_acc is None: + x_new = x + else: + # Find indices of x that are already in self.x_acc + matching = torch.all( + x.unsqueeze(1) == self.x_acc.unsqueeze(0), dim=2 + ) # (N, M) + matching_indices = torch.nonzero(matching, as_tuple=False) # (K, 2) + idx_cached_in_x = matching_indices[:, 0] # Indices in x + idx_cached_in_acc = matching_indices[:, 1] # Indices in self.x_acc + + # Determine which indices are new + mask_cached = torch.zeros(N, dtype=torch.bool) + mask_cached[idx_cached_in_x] = True + idx_new = torch.nonzero(~mask_cached).squeeze(1) + # Retrieve cached function values + sample[idx_cached_in_x] = self.y_acc[idx_cached_in_acc] + x_new = x[idx_new] + + # Compute function values for new points + if len(x_new) > 0: + if self.gp.fitted: + mean_new, cov_new = self.gp.mean_std_sub(x_new, full=True) + mean_new = mean_new.squeeze(1) + else: + mean_new = torch.zeros( + len(x_new), + ) + cov_new = self.gp.kernel( + x, + x, + ) + + # Sample truncated GP for new points + factor = torch.eye(len(x_new)) + summand = torch.zeros(len(x_new)) + cov_new = cov_new.cpu().numpy() + 1e-7 * np.eye(len(x_new)) + sample_new = tmg( + 1, + mean_new.cpu().numpy(), + cov_new, + torch.ones(len(x_new)).cpu().numpy(), + factor.cpu().numpy(), + summand.cpu().numpy(), + burn_in, + True, + ) + sample_new = torch.tensor(sample_new[0]) + + # Update sample array and caches + if self.x_acc is None: + sample = sample_new + self.x_acc = x_new + self.y_acc = sample_new + else: + sample[idx_new] = sample_new + self.x_acc = torch.cat([self.x_acc, x_new]) + self.y_acc = torch.cat([self.y_acc, sample_new]) + + self.gp.fit(self.x_acc, self.y_acc.unsqueeze(1)) + + return sample * dt From 1a091a6bf6b0de99f8560b81ba45b4eadf3cb3b9 Mon Sep 17 00:00:00 2001 From: peibensteine Date: Sun, 2 Feb 2025 13:54:31 +0100 Subject: [PATCH 31/39] optimizations: GPU sampling, kmeans subsampling, grid subsampling, gpu NMF, gpu linear interpolation and nearest neighbor, existing basis refit and new basis fit --- stpy/embeddings/optimal_positive_basis.py | 373 ++++++++++++++---- stpy/helpers/parallel_interpolation.py | 346 ++++++++++++++++ stpy/helpers/voxel_grid.py | 56 +++ .../point_processes/poisson_rate_estimator.py | 13 +- 4 files changed, 697 insertions(+), 91 deletions(-) create mode 100644 stpy/helpers/parallel_interpolation.py create mode 100644 stpy/helpers/voxel_grid.py diff --git a/stpy/embeddings/optimal_positive_basis.py b/stpy/embeddings/optimal_positive_basis.py index 782ab42..3ef3e85 100644 --- a/stpy/embeddings/optimal_positive_basis.py +++ b/stpy/embeddings/optimal_positive_basis.py @@ -1,14 +1,18 @@ -import pickle -from typing import Optional +from typing import Literal import numpy as np import scipy +from stpy.helpers.voxel_grid import voxel_grid +from stpy.helpers.parallel_interpolation import InterpolatorArray import torch from stpy.borel_set import BorelSet -from stpy.continuous_processes.nystrom_fea import NystromFeatures from stpy.embeddings.positive_embedding import PositiveEmbedding from stpy.kernels import KernelFunction +from sklearn.decomposition import NMF +from nmf import run_nmf +from stpy.helpers.posterior_sampling import tmg +from fast_pytorch_kmeans import KMeans class OptimalPositiveBasis(PositiveEmbedding): @@ -18,17 +22,25 @@ def __init__( *args, samples=300, discretization_size=30, - saved=False, - roi: torch.Tensor | BorelSet | None = None, - **kwargs + data: torch.Tensor | BorelSet, + fast_sampling=False, # samples using squared gaussian instead of truncated gausian + memory_limit=5, # Limits the amount of points used for optimal basis construction + sample_algorithm: Literal[ + "grid", "kmeans" + ] = "grid", # How to subsample if points are limited + **kwargs, ): # roi is the set of points that the basis is optimal for if it is a tensor # else it is the region that the basis if optimal for that will be discretized # by discretization_size. If it is not given the entire domain will be used. super().__init__(*args, **kwargs) - self.samples = np.maximum(samples, self.m) + self.sample_algorithm = sample_algorithm + self.num_samples = np.maximum(samples, self.m) + self.fast = fast_sampling + self.memory_limit = memory_limit if memory_limit is not None else 40 + self.interpolators = None - if roi is None: + if data is None: B = BorelSet( self.d, torch.tensor( @@ -36,10 +48,10 @@ def __init__( ).double(), ) self.discretized_domain = B.return_discretization(discretization_size) - elif isinstance(roi, BorelSet): - self.discretized_domain = roi.return_discretization(discretization_size) + elif isinstance(data, BorelSet): + self.discretized_domain = data.return_discretization(discretization_size) else: - self.discretized_domain = roi + self.discretized_domain = data y = self.discretized_domain[:, 0].view(-1, 1) * 0 @@ -49,43 +61,26 @@ def __init__( # gamma = self.kernel_object.gamma, d = self.kernel_object.d) self.new_kernel_object = self.kernel_object - if saved == True: - print("Did not load GP object, it needs to loaded") - else: - self.GP = NystromFeatures( - self.new_kernel_object, - m=self.m, - approx="positive_svd", - samples=self.samples, + self._fit_data(data=data) + print("Optimal basis constructed.") + if torch.sum(torch.isnan(self.embed_internal(self.discretized_domain))) > 0: + print( + "Failed basis? (zero is good):", + torch.sum(torch.isnan(self.embed_internal(self.discretized_domain))), ) - self.GP.fit_gp(self.discretized_domain, y) - print("Optimal basis constructed.") - if torch.sum(torch.isnan(self.GP.embed(self.discretized_domain))) > 0: - print( - "Failed basis? (zero is good):", - torch.sum(torch.isnan(self.GP.embed(self.discretized_domain))), - ) self.precomp_integral = {} def get_m(self): return self.m - def basis_fun(self, x, j): - return self.GP.embed(x)[:, j].view(-1, 1) - def embed_internal(self, x): - out = torch.zeros(size=(x.size()[0], self.m), dtype=torch.float64) + out = torch.zeros([len(x), self.m], dtype=torch.float64) for j in range(self.m): out[:, j] = self.basis_fun(x, j).view(-1) return out - def save_embedding(self, filename): - filehandler = open(filename, "w") - pickle.dump(self.GP, filehandler) - - def load_embedding(self, filename): - file_pi2 = open(filename, "r") - self.GP = pickle.load(file_pi2) + def basis_fun(self, x, j): + raise Exception("Fit on data before using") def get_constraints(self): s = self.get_m() @@ -102,7 +97,7 @@ def integral(self, S): else: if S.d == 1: weights, nodes = S.return_legendre_discretization(n=256) - psi = torch.sum(torch.diag(weights) @ self.GP.embed(nodes), dim=0) + psi = torch.sum(torch.diag(weights) @ self.embed_internal(nodes), dim=0) Gamma_half = self.cov() psi = Gamma_half.T @ psi self.precomp_integral[S] = psi @@ -125,7 +120,7 @@ def cov(self, inverse=False): if self.precomp == False: x = self.discretized_domain - vals = self.GP.embed(x) + vals = self.embed_internal(x) indices = torch.argmax( vals, dim=0 ) # the nodes are the maxima of the bump functions @@ -157,6 +152,217 @@ def cov(self, inverse=False): else: return self.Gamma_half + def _sample_gaussian_prior(self, x: torch.Tensor): + n = self.num_samples + dim = len(x) + Cov = self.kernel_object.kernel(x, x) + 10e-7 * torch.eye( + dim, dtype=torch.float64 + ) + L = torch.linalg.cholesky(Cov) + if self.fast: + random_vector = torch.normal( + mean=torch.zeros(dim, n, dtype=torch.float64), std=1.0 + ) + y = torch.mm(L, random_vector) ** 2 + else: + y = torch.tensor( + tmg( + n, + np.zeros([dim], dtype=np.float64), + Cov.cpu().numpy(), + np.ones([dim], dtype=np.float64), + np.eye(dim, dtype=np.float64), + np.zeros(dim, dtype=np.float64), + verbose=True, + ), + dtype=torch.float64, + ) + return y, L + + def _sample_gaussian_conditional(self, x_old, L_old, y_old, x): + dim = len(x) # dimensionality of input + n = y_old.size(1) # number of samples + + K_new_new = self.kernel_object.kernel(x, x) + 1e-7 * torch.eye( + dim, dtype=torch.float64 + ) + K_new_old = self.kernel_object.kernel(x_old, x) + + alpha = torch.linalg.solve_triangular(L_old, y_old, upper=False) + alpha = torch.linalg.solve_triangular(L_old.T, alpha, upper=True) + + mu_star = K_new_old @ alpha # shape (dim, n) + # TODO check if kernel is always symmetric + K_old_new = K_new_old.T # shape (dim_old, dim) + + tmp = torch.linalg.solve_triangular(L_old, K_old_new, upper=False) + tmp2 = torch.linalg.solve_triangular(L_old.T, tmp, upper=True) + + Sigma_star = ( + K_new_new - (K_new_old @ tmp2) + 1e-7 * torch.eye(dim, dtype=torch.float64) + ) + + L_star = torch.linalg.cholesky(Sigma_star) + if self.fast: + random_vector_new = torch.normal( + mean=torch.zeros(dim, n, dtype=torch.float64), std=1.0 + ) + y_new = (mu_star + L_star @ random_vector_new) ** 2 + else: + y_new = torch.tensor( + tmg( + n, + mu_star.cpu().numpy(), + Sigma_star.cpu().numpy(), + np.ones([dim], dtype=np.float64), + np.eye(dim, dtype=np.float64), + np.zeros(dim, dtype=np.float64), + verbose=True, + ), + dtype=torch.float64, + ) + + return y_new + + def _subsample_if_necessary(self, x: torch.Tensor): + # Calculate number of clusters + n_clusters = (self.memory_limit * 1_000_000_000) / x.element_size() + # Since we want to calculate the cholesky decomp of the cov matrix of the data plus roi (expected to be 1% of data) + n_clusters = int(np.sqrt(n_clusters) * 0.99 / 2.0) + + if len(x) > n_clusters: + if self.sample_algorithm == "grid": + centroids = voxel_grid(x, approx_n_voxels=n_clusters) + print( + f"Approximated data set with {len(centroids)} points for optimal" + " basis." + ) + return centroids + elif self.sample_algorithm == "kmeans": + # Calculate maximum size of mini batch + n_samples, n_features = x.shape + SAFETY_FACTOR = 1.5 + max_batch_size = int( + ( + self.memory_limit * 1_000_000_000 + - 0.8 * n_samples + - 2 * n_clusters * n_features * x.element_size() + ) + // ( + ( + n_features * n_clusters * x.element_size() + + n_features * x.element_size() + ) + * SAFETY_FACTOR + ) + ) + if max_batch_size >= n_samples: + max_batch_size = None + + print( + f"Approximating data set with {n_clusters} points from" + f" {len(x)} points for optimal basis." + + ( + f"Using batch size {max_batch_size}" + if max_batch_size is not None + else "" + ) + ) + kmeans = KMeans( + n_clusters=n_clusters, + mode="euclidean", + verbose=1, + minibatch=max_batch_size, + ) + kmeans.fit_predict(x) + centroids = kmeans.centroids + + return centroids + else: + return x + + def _fit_data(self, data): + self.data_m = self.m + data = self._subsample_if_necessary(data) + self.F_data, self.L_data = self._sample_gaussian_prior(data) + self.F_data = self.F_data**2 + self.W_data, self.H_data, err = run_nmf( + self.F_data, + n_components=self.m, + tol=1e-12, + use_gpu=True, + batch_max_iter=2000, + fp_precision=self.F_data.dtype, + ) + self.W_data = torch.tensor(self.W_data) + self.H_data = torch.tensor(self.H_data) + self.W_data = self.W_data / torch.linalg.norm(self.W_data, dim=0) + self.data = data + W_norm = self.W_data + self._set_interpolators(data, W_norm) + + def basis_fun(self, q: torch.Tensor, j: int): + if self.interpolators is None: + raise Exception("Fit on data before using") + + return self.interpolators(j, q) + + def _set_interpolators(self, x: torch.Tensor, phi: torch.Tensor): + assert x.dtype == phi.dtype + self.interpolators = InterpolatorArray(x, phi, self.m) + + def fit(self, roi: torch.Tensor): + assert self.data is not None, "Data must be given first" + print("Refitting optimal basis") + self.precomp = False + x = torch.cat((self.data, roi), dim=0) + F, _ = self._sample_gaussian_prior(x) + F = F**2 + # Note: using cpu based NMF here since run_nmf has no way to pass initialization + model = NMF(n_components=self.data_m, max_iter=200, tol=1e-8, init="custom") + phi_roi_init = torch.zeros([len(roi), self.data_m], dtype=torch.float64) + W_start = torch.cat((self.W_data, phi_roi_init), dim=0) + W = torch.tensor( + model.fit_transform( + F.cpu().numpy(), + W=W_start.cpu().numpy(), + H=self.H_data.cpu().numpy(), + ) + ) + self.Phi = W / torch.linalg.norm(W, dim=0) + self.m = self.data_m + self._set_interpolators(x, self.Phi) + self.precomp = False + self.precomp_integral = {} + + def add_new_functions(self, roi: torch.Tensor, n: int): + x = torch.cat((self.data, roi), dim=0) + F_new = self._sample_gaussian_conditional( + self.data, self.L_data, self.F_data, roi + ) + F = torch.cat([self.F_data, F_new]) + Phi_old = ( + torch.stack([self.basis_fun(x, j) for j in range(self.data_m)]).squeeze(2).T + ) + Theta_old = self.H_data + # TODO, theoretically this is wrong and we would have to solve over both Phi_old and Phi_new + # also, caping at 0 has no theoretical underpinning + objective = torch.clamp(F - Phi_old @ Theta_old, min=0) + Phi_new, Theta_new, err = run_nmf( + objective, + n_components=n, + tol=1e-8, + use_gpu=True, + batch_max_iter=600, + fp_precision=objective.dtype, + ) + Phi_new = torch.tensor(Phi_new) + self.Phi = Phi_new / torch.linalg.norm(Phi_new, dim=0) + self.m = self.data_m + n + self.interpolators.add(x, self.Phi, n) + self.precomp = False + self.precomp_integral = {} + if __name__ == "__main__": @@ -166,65 +372,58 @@ def cov(self, inverse=False): from scipy.interpolate import griddata d = 2 - m = 64 + m = 5 n = 64 - N = 20 - sqrtbeta = 2 s = 0.01 b = 0 gamma = 0.5 k = KernelFunction(gamma=gamma, d=2) - Emb = OptimalPositiveBasis( - d, m, offset=0.2, s=s, b=b, discretization_size=n, B=1000.0, kernel_object=k - ) - - GP = GaussianProcess(d=d, s=s) xtest = torch.tensor(interval(n, d)) - x = torch.tensor(np.random.uniform(-1, 1, size=(N, d))) - - F_true = lambda x: torch.sum(torch.sin(x) ** 2 - 0.1, dim=1).view(-1, 1) - F = lambda x: F_true(x) + s * torch.randn(x.size()[0]).view(-1, 1).double() - y = F(x) - - # Try to plot the basis functions - msqrt = int(np.sqrt(m)) - fig, axs = plt.subplots(msqrt, msqrt, figsize=(15, 7)) - for i in range(m): - f_i = Emb.basis_fun(xtest, i) ## basis function - xx = xtest[:, 0].cpu().numpy() - yy = xtest[:, 1].cpu().numpy() - ax = axs[int(i // msqrt), (i % msqrt)] - grid_x, grid_y = np.mgrid[min(xx) : max(xx) : 100j, min(yy) : max(yy) : 100j] - grid_z_f = griddata( - (xx, yy), f_i[:, 0].detach().numpy(), (grid_x, grid_y), method="linear" - ) - cs = ax.contourf(grid_x, grid_y, grid_z_f, levels=10) - ax.contour(cs, colors="k") - # cbar = fig.colorbar(cs) - # if self.x is not None: - # ax.scatter(self.x[:, 0].detach().numpy(), self.x[:, 1].detach().numpy(), c='r', s=100, marker="o") - ax.grid(c="k", ls="-", alpha=0.1) - - plt.savefig("positive.png") - plt.show() - - Emb.fit(x, y) - GP.fit_gp(x, y) + xnew = xtest[:1000] - mu, _ = Emb.mean_std(xtest) - mu_true, _ = GP.mean_std(xtest) + xtest = xtest[1000:] - Emb.visualize_function( - xtest, [F_true, lambda x: GP.mean_std(x)[0], lambda x: Emb.mean_std(x)[0]] + Emb = OptimalPositiveBasis( + d, + m, + offset=0.2, + s=s, + b=b, + discretization_size=n, + B=1000.0, + kernel_object=k, + data=xtest, ) - # Emb.visualize_function(xtest,GP.mean_std) - # Emb.visualize_function(xtest,Emb.mean_std) - # plt.plot(xtest,mu_true,'b--', label = 'GP') + y, L = Emb._sample_prior(xtest, 1) + + fig, ax = plt.subplots(figsize=(10, 6)) + xx = xtest[:, 0].cpu().numpy() + yy = xtest[:, 1].cpu().numpy() + sc = ax.scatter(xx, yy, c=y.detach().numpy().reshape(-1), cmap="viridis") + ax.grid(c="k", ls="-", alpha=0.1) + plt.colorbar(sc) + plt.title("Interpolated plot of y over xtest") + plt.xlabel("x1") + plt.ylabel("x2") + plt.show() - # plt.plot(x,y,'ro') - # plt.plot(xtest, mu, 'g-', label = 'positive basis ') - # plt.legend() + ynew = Emb._sample_conditional(xtest, L, y, xnew) + + xtest = torch.cat([xtest, xnew]) + y = torch.cat([y, ynew]) + + fig, ax = plt.subplots(figsize=(10, 6)) + xx = xtest[:, 0].cpu().numpy() + yy = xtest[:, 1].cpu().numpy() + sc = ax.scatter(xx, yy, c=y.detach().numpy().reshape(-1), cmap="viridis") + ax.grid(c="k", ls="-", alpha=0.1) + plt.colorbar(sc) + plt.title("Interpolated plot of y over xtest") + plt.xlabel("x1") + plt.ylabel("x2") plt.show() + + print("hi") diff --git a/stpy/helpers/parallel_interpolation.py b/stpy/helpers/parallel_interpolation.py new file mode 100644 index 0000000..6967801 --- /dev/null +++ b/stpy/helpers/parallel_interpolation.py @@ -0,0 +1,346 @@ +from typing import overload +from torch.multiprocessing import Pool, set_start_method +from os import cpu_count +import line_profiler +import torch +import numpy as np +from scipy.spatial import Delaunay, cKDTree + +shared_triangulation: Delaunay | None +xtree: cKDTree + + +def _initialize(tri: Delaunay, tree: cKDTree): + global shared_triangulation + global xtree + shared_triangulation = tri + xtree = tree + + +def _find_exact_or_simplex_batch(batch: np.ndarray): + distances, idx = xtree.query(batch, k=1, distance_upper_bound=1e-7) + exact_match_mask = distances <= 1e-7 + batch_remaining = batch[~exact_match_mask] + + simplices = shared_triangulation.find_simplex(batch_remaining) + outside_conv_hull_mask = simplices < 0 + + simplices_remaining = simplices[~outside_conv_hull_mask] + exact_match_mask[~exact_match_mask] = outside_conv_hull_mask + + idx = idx[exact_match_mask] + if len(simplices_remaining) > 0: + no_match_mask = idx == len(xtree.data) + if no_match_mask.any(): + _, idx_no_match = xtree.query(batch[exact_match_mask][no_match_mask], k=1) + idx[no_match_mask] = idx_no_match + + return idx, simplices_remaining, exact_match_mask + + +class InterpolatorArray: + + def __init__(self, x: torch.Tensor, phi: torch.Tensor, m: int, num_cpu_cores=None): + # Ensure x is on CPU for Delaunay + x_cpu = x.cpu().numpy() + + # Build the Delaunay triangulation on CPU + tri = Delaunay(x_cpu) + xtree = cKDTree(x_cpu) + if num_cpu_cores is None: + num_cpu_cores = cpu_count() + self.num_cpu_cores = num_cpu_cores + pool = Pool(num_cpu_cores, _initialize, [tri, xtree]) + self.interpolators = [ + InterpolatorND(x, phi[:, j], tri, xtree, pool, num_cpu_cores) + for j in range(m) + ] + self.pools = [pool] + + def __call__(self, j: int, q: torch.Tensor): + return self.interpolators[j](q).view(-1, 1) + + def add(self, x: torch.Tensor, phi: torch.Tensor, m: int): + x_cpu = x.cpu().numpy() + tri = Delaunay(x_cpu) + xtree = cKDTree(x_cpu) + pool = Pool(self.num_cpu_cores, _initialize, [tri, xtree]) + self.interpolators.extend( + [ + InterpolatorND(x, phi[:, j], tri, xtree, pool, self.num_cpu_cores) + for j in range(m) + ] + ) + self.pools.append(pool) + + def __del__(self): + for pool in self.pools: + pool.close() + pool.join() + + +class InterpolatorND: + """ + Piecewise linear interpolator for N-dimensional data using Delaunay triangulation. + """ + + def __init__( + self, + x: torch.Tensor, + y: torch.Tensor, + tri=None, + xtree=None, + pool=None, + num_cpu_cores=None, + ): + """ + Args: + x: (N, D) tensor of input points in D-dimensional space. + y: (N,) tensor of function values at those points. + tri: Precomputed Delaunay triangulation. + xtree: Precomputed cKDTree for nearest neighbor search. + pool: Optional multiprocessing pool. + num_cpu_cores: Number of CPU cores to use for parallel processing. + """ + assert y.dtype == torch.float64 + + if tri is not None: + # Use the provided Delaunay triangulation and cKDTree + self.tri = tri + self.xtree = xtree + self.pool = pool + self.own_pool = False + self.num_cpu_cores = num_cpu_cores + else: + # Ensure x is on CPU for Delaunay + x_cpu = x.detach().cpu().numpy() + + # Build the Delaunay triangulation on CPU + self.tri = Delaunay(x_cpu) + xtree = cKDTree(x_cpu) + + if num_cpu_cores is None: + num_cpu_cores = cpu_count() + self.num_cpu_cores = num_cpu_cores + self.pool = Pool(num_cpu_cores, _initialize, [self.tri, xtree]) + self.own_pool = True + + self.x = x + self.y = y + + # Convert triangle simplices to a torch tensor + simplices = torch.tensor(self.tri.simplices, device=y.device) + self.simplices = simplices # Shape: (M, D+1), M = # of simplices + + # Gather simplex vertex positions and function values + self.tri_pts = x[simplices] # Shape: (M, D+1, D) + self.tri_y = y[simplices] # Shape: (M, D+1) + + # Precompute matrices for barycentric transformation + v0 = self.tri_pts[:, 0, :] # First vertex of each simplex + T = self.tri_pts[:, 1:, :] - v0[:, None, :] # (M, D, D) + T = T.transpose(-1, -2) + self.T_inv = torch.inverse(T) # (M, D, D) + self.v0 = v0 # Store v0 for barycentric computation + + def __del__(self): + if self.own_pool: + self.pool.close() + self.pool.join() + + def __call__(self, xp: torch.Tensor) -> torch.Tensor: + """ + Interpolate y-values at query points xp. Does not keep order of points the same! + + Args: + xp: (B, D) tensor of query points in D-dimensional space. + + Returns: + out: (B,) tensor of interpolated values. + """ + + xp_cpu = xp.cpu().numpy() # (B, D) + + # 1) Use Delaunay.find_simplex on CPU to find simplices + # simplex_idx = self.tri.find_simplex(xp_cpu) # (B,) + + # Split xp_cpu into batches for parallel processing + batches = np.array_split(xp_cpu, self.num_cpu_cores) + # Use multiprocessing to parallelize find_simplex + results = self.pool.map_async( + _find_exact_or_simplex_batch, [batch for batch in batches] + ).get(timeout=10) + # Concatenate the results back into a single array + # results = [(out_exact_matches0, xp0, simplices0), (out_exact_matches1, xp1, simplices1), ...] + exact_matches_idx_list = [] + exact_match_mask_list = [] + simplices_list = [] + for exact_matches_idx, simplices, exact_match_mask in results: + exact_matches_idx_list.append(exact_matches_idx) + exact_match_mask_list.append(exact_match_mask) + simplices_list.append(simplices) + + exact_matches_idx = np.concatenate(exact_matches_idx_list) + exact_matches_idx = torch.tensor(exact_matches_idx) + exact_matches_y = self.y[exact_matches_idx] + if len(exact_matches_y) == len(xp): + return exact_matches_y + + exact_match_mask = np.concatenate(exact_match_mask_list) + exact_match_mask = torch.tensor(exact_match_mask) + + simplices_remaining = np.concatenate(simplices_list) + simplices_remaining = torch.tensor(simplices_remaining) # (B,) + + xp_remaining = xp[~exact_match_mask] + + # p - v0: (Bv, D) + p_v0 = xp_remaining - self.v0[simplices_remaining] + + # alpha = T_inv @ (p - v0): (Bv, D) + T_inv_local = self.T_inv[simplices_remaining] # (Bv, D, D) + # Batched Matrix multiplication, but T_inv_local is transposed + bary_coords = torch.einsum("bij, bj -> bi", T_inv_local, p_v0) # (Bv, D) + + # Compute last barycentric coordinate + bary_coords = torch.cat( + [1 - bary_coords.sum(dim=-1, keepdim=True), bary_coords], dim=-1 + ) # (Bv, D+1) + + # 5) Interpolate y-values using barycentric coordinates + tri_y_local = self.tri_y[simplices_remaining] # (Bv, D+1) + out_interpolated = (bary_coords * tri_y_local).sum(dim=-1) # (Bv,) + + # 6) Store results for valid points + result = torch.empty(len(xp), dtype=self.y.dtype, device=self.y.device) + result[exact_match_mask] = exact_matches_y + result[~exact_match_mask] = out_interpolated + return result + + +def plot_simple_function(): + + # Define a simple 2D function + def simple_function(x, y): + return np.sin(np.pi * x) * np.cos(np.pi * y) + + # Generate a grid of points for the original function + n_points = 21 # Number of points along each axis + x_vals = np.linspace(0, 1, n_points) + y_vals = np.linspace(0, 1, n_points) + x_grid, y_grid = np.meshgrid(x_vals, y_vals) + z_grid = simple_function(x_grid, y_grid) # Compute function values + + # Flatten the grid for input to the interpolator + x_flat = x_grid.flatten() + y_flat = y_grid.flatten() + z_flat = z_grid.flatten() + + # Convert to PyTorch tensors + x_train = torch.tensor( + np.column_stack((x_flat, y_flat)), dtype=torch.float64, device="cuda" + ) + + y_train = torch.tensor(z_flat, dtype=torch.float64, device="cuda") + + # Create the interpolator + interpolator = InterpolatorND(x_train, y_train, num_cpu_cores=1) + + # Generate slightly offset query points + n_query = 21 + x_query_vals = np.linspace(0.010, 1.01, n_query) + y_query_vals = np.linspace(0.010, 1.01, n_query) + # x_query_vals = np.array([0.31]) + # y_query_vals = np.array([0.01]) + x_query_grid, y_query_grid = np.meshgrid(x_query_vals, y_query_vals) + x_query_grid = np.concat( + [np.linspace(0.0, 1.0, n_query).reshape(1, -1), x_query_grid] + ) + y_query_grid = np.concat([np.zeros([1, n_query]), y_query_grid]) + x_query_flat = x_query_grid.flatten() + y_query_flat = y_query_grid.flatten() + + # Convert query points to PyTorch tensors + x_query = torch.tensor( + np.column_stack((x_query_flat, y_query_flat)), + dtype=torch.float64, + device="cuda", + ) + + # Perform interpolation + z_query = interpolator(x_query).cpu().numpy() # Interpolated values + + # Plot the original function as a scatter plot + plt.figure(figsize=(30, 10)) + plt.subplot(1, 2, 1) + plt.scatter(x_flat, y_flat, c=z_flat, cmap="viridis", s=40) + plt.title("Original Function") + plt.colorbar() + plt.subplot(1, 2, 2) + plt.scatter(x_query_flat, y_query_flat, c=z_query, cmap="viridis", s=200) + plt.scatter( + x_flat, + y_flat, + c=z_flat, + cmap="viridis", + s=200, + ) + plt.title("Interpolated Function with Original Points") + plt.colorbar() + # Show plots + plt.tight_layout() + plt.show() + print("hi") + + +def interploate_between(): + + # Define 5 points in a 1x1 field + x_points = np.array([[0.1, 0.1], [1, 0], [0, 1], [1, 1], [0.3, 0.3], [0.7, 0.7]]) + # Add random noise to x_points + noise = np.random.normal(scale=0.01, size=x_points.shape) + # x_points += noise + y_values = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0]) + + # Convert to PyTorch tensors + x_train = torch.tensor(x_points, dtype=torch.float64, device="cuda") + y_train = torch.tensor(y_values, dtype=torch.float64, device="cuda") + + # Create the interpolator + interpolator = InterpolatorND(x_train, y_train, num_cpu_cores=1) + + # Generate a grid of query points + n_query = 10 # Number of query points along each axis + x_query_vals = np.linspace(0, 1, n_query) + y_query_vals = np.linspace(0, 1, n_query) + x_query_grid, y_query_grid = np.meshgrid(x_query_vals, y_query_vals) + x_query_flat = x_query_grid.flatten() + y_query_flat = y_query_grid.flatten() + + # Convert query points to PyTorch tensors + x_query = torch.tensor( + np.column_stack((x_query_flat, y_query_flat)), + dtype=torch.float64, + device="cuda", + ) + + # Perform interpolation + z_query = interpolator(x_query).cpu().numpy() # Interpolated values + + # Plot the interpolated values + plt.figure(figsize=(10, 10)) + plt.scatter(x_query_flat, y_query_flat, c=z_query, cmap="viridis", s=40) + plt.scatter(x_points[:, 0], x_points[:, 1], c=y_values, cmap="viridis", s=200) + plt.title("Interpolated Values") + plt.colorbar() + plt.show() + print("hi") + + +if __name__ == "__main__": + + import torch + import numpy as np + import matplotlib.pyplot as plt + + interploate_between() diff --git a/stpy/helpers/voxel_grid.py b/stpy/helpers/voxel_grid.py new file mode 100644 index 0000000..fdcb28f --- /dev/null +++ b/stpy/helpers/voxel_grid.py @@ -0,0 +1,56 @@ +from typing import List, Optional, Union + + +import torch +from torch import Tensor + +from torch_cluster import grid_cluster + + +def _calculate_voxel_size(x: Tensor, n_voxels: int) -> float: + data_range = x.max(dim=0).values - x.min(dim=0).values + total_volume = torch.prod(data_range) + voxel_volume = total_volume / n_voxels + voxel_size = voxel_volume ** (1 / x.shape[1]) + return voxel_size.item() + + +def voxel_grid( + x: Tensor, + size: Union[float, Tensor, None] = None, + approx_n_voxels: int | None = None, +) -> Tensor: + # approx_n_voxels is only correct if the input domain is a (hyper) cube + # in every other case the result will either be more or less + + if size is None: + assert approx_n_voxels is not None, "One of size, n_voxels must be given" + size = _calculate_voxel_size(x, approx_n_voxels) + + if isinstance(size, float): + size = torch.full([x.shape[1]], size) + indices = grid_cluster(x, size).unsqueeze(1).expand(-1, x.shape[1]) + out = torch.full( + [indices.max() + 1, x.shape[1]], torch.nan, dtype=x.dtype, device=x.device + ) + averaged = out.scatter_reduce(0, indices, x, reduce="mean", include_self=False) + return averaged[~torch.isnan(averaged).any(dim=1)] + + +if __name__ == "__main__": + + # Example usage of voxel_grid + x = torch.tensor( + [ + [0.1, 0.2, 0.3], + [2, 0, 0], + [0, 2, 0], + [0, 0, 2], + [2.1, 2.2, 2.3], + [3, 3, 3], + ] + ) + size = 1.0 + + result = voxel_grid(x, approx_n_voxels=3) + print(result) diff --git a/stpy/point_processes/poisson_rate_estimator.py b/stpy/point_processes/poisson_rate_estimator.py index ca20358..720aa22 100644 --- a/stpy/point_processes/poisson_rate_estimator.py +++ b/stpy/point_processes/poisson_rate_estimator.py @@ -61,6 +61,7 @@ def __init__( optimization_library="torch", roi: torch.Tensor | BorelSet | None = None, roi_discretization: int = 30, + memory_limit=None, ): self.d = d """ Dimension of the data """ @@ -162,8 +163,9 @@ def __init__( offset=offset, s=np.sqrt(jitter), samples=samples_nystrom, - roi=roi, + data=roi, discretization_size=roi_discretization, + memory_limit=memory_limit, ) elif basis == "custom": assert embedding is not None @@ -231,8 +233,7 @@ def add_data_point(self, new_data, times=True): where data_points is a 2d tensor, with number of columns equal to d and number of rows equal to the number of point observations - It triggers a re-fitting of the approximation parameters $\hat \theta$ - and adds + It adds - the integral over the sensing area plus the log of the integral over the sensing area if the data is of type histogram - the integral over the sensing are plus the sum of the rate function at the datapoints if the data is of type count-record @@ -1467,7 +1468,11 @@ def penalized_likelihood(self, threads=None): prob = cp.Problem(objective, constraints) if self.rate is not None: - theta.value = self.rate.cpu().numpy() + theta.value = ( + torch.cat([self.rate, torch.zeros([self.get_m() - len(self.rate)])]) + .cpu() + .numpy() + ) try: prob.solve( From 7696ad7b6847eda9d304a82d0280f4ae193f298c Mon Sep 17 00:00:00 2001 From: peibensteine Date: Sun, 2 Feb 2025 13:54:44 +0100 Subject: [PATCH 32/39] spelling fix --- stpy/embeddings/positive_embedding.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stpy/embeddings/positive_embedding.py b/stpy/embeddings/positive_embedding.py index b19f208..c523e35 100644 --- a/stpy/embeddings/positive_embedding.py +++ b/stpy/embeddings/positive_embedding.py @@ -64,7 +64,7 @@ def __init__( ) self.mu = None self.precomp = False - self.procomp_integrals = {} + self.precomp_integral = {} def get_size(self): return self.m**self.d From 48d6abe42aabccc8204783df442d610fac7e0aa1 Mon Sep 17 00:00:00 2001 From: peibensteine Date: Sun, 2 Feb 2025 14:25:22 +0100 Subject: [PATCH 33/39] voxel size binary search --- stpy/embeddings/optimal_positive_basis.py | 5 ++-- stpy/helpers/voxel_grid.py | 31 ++++++++++++++--------- 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/stpy/embeddings/optimal_positive_basis.py b/stpy/embeddings/optimal_positive_basis.py index 3ef3e85..ccbf774 100644 --- a/stpy/embeddings/optimal_positive_basis.py +++ b/stpy/embeddings/optimal_positive_basis.py @@ -23,7 +23,7 @@ def __init__( samples=300, discretization_size=30, data: torch.Tensor | BorelSet, - fast_sampling=False, # samples using squared gaussian instead of truncated gausian + fast_sampling=True, # samples using squared gaussian instead of truncated gausian memory_limit=5, # Limits the amount of points used for optimal basis construction sample_algorithm: Literal[ "grid", "kmeans" @@ -232,7 +232,7 @@ def _subsample_if_necessary(self, x: torch.Tensor): if len(x) > n_clusters: if self.sample_algorithm == "grid": - centroids = voxel_grid(x, approx_n_voxels=n_clusters) + centroids = voxel_grid(x, max_n_voxels=n_clusters) print( f"Approximated data set with {len(centroids)} points for optimal" " basis." @@ -279,6 +279,7 @@ def _subsample_if_necessary(self, x: torch.Tensor): return centroids else: + print("No subsampling necessary because data fits into memory") return x def _fit_data(self, data): diff --git a/stpy/helpers/voxel_grid.py b/stpy/helpers/voxel_grid.py index fdcb28f..b4c309e 100644 --- a/stpy/helpers/voxel_grid.py +++ b/stpy/helpers/voxel_grid.py @@ -7,25 +7,32 @@ from torch_cluster import grid_cluster -def _calculate_voxel_size(x: Tensor, n_voxels: int) -> float: - data_range = x.max(dim=0).values - x.min(dim=0).values - total_volume = torch.prod(data_range) - voxel_volume = total_volume / n_voxels - voxel_size = voxel_volume ** (1 / x.shape[1]) - return voxel_size.item() +def _get_n_voxels(x, size: float): + size = torch.full([x.shape[1]], size) + indices = grid_cluster(x, size) + return indices.unique().numel() def voxel_grid( x: Tensor, size: Union[float, Tensor, None] = None, - approx_n_voxels: int | None = None, + max_n_voxels: int | None = None, ) -> Tensor: - # approx_n_voxels is only correct if the input domain is a (hyper) cube - # in every other case the result will either be more or less + # Do binary search to find the right voxel size that yields <= max_n_voxels if size is None: - assert approx_n_voxels is not None, "One of size, n_voxels must be given" - size = _calculate_voxel_size(x, approx_n_voxels) + assert max_n_voxels is not None, "One of size, n_voxels must be given" + max_size = (x.max(dim=0).values - x.min(dim=0).values).max().item() + tol = max_size / 1e7 + low, high = 0, max_size + while high - low > tol: + mid = (low + high) / 2 + n_voxels = _get_n_voxels(x, mid) + if n_voxels > max_n_voxels: + low = mid + else: + high = mid + size = high if isinstance(size, float): size = torch.full([x.shape[1]], size) @@ -52,5 +59,5 @@ def voxel_grid( ) size = 1.0 - result = voxel_grid(x, approx_n_voxels=3) + result = voxel_grid(x, max_n_voxels=3) print(result) From 9c2775255d04629fe040b028e3c99a6cf66ddb3d Mon Sep 17 00:00:00 2001 From: peibensteine Date: Sun, 2 Feb 2025 18:14:16 +0100 Subject: [PATCH 34/39] add new dependencies --- setup.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/setup.py b/setup.py index 3b39c8b..8e0e9fe 100755 --- a/setup.py +++ b/setup.py @@ -14,6 +14,10 @@ "quadprog", "cvxpylayers", "autograd_minimize", + "torch-cluster", + "nmf-torch", + "fast-pytorch-kmeans", + "tqdm", ] # setup( @@ -27,4 +31,5 @@ packages=["stpy"], zip_safe=False, install_requires=packages, + setup_requires=["torch", "Cython"], ) From dda4e0687eeaf9d0922e5afd1f13464087b45e89 Mon Sep 17 00:00:00 2001 From: peibensteine Date: Sun, 2 Feb 2025 18:16:30 +0100 Subject: [PATCH 35/39] use same device and data type class-wide --- stpy/helpers/parallel_interpolation.py | 12 ++--- .../point_processes/poisson_rate_estimator.py | 49 ++++++++++--------- 2 files changed, 33 insertions(+), 28 deletions(-) diff --git a/stpy/helpers/parallel_interpolation.py b/stpy/helpers/parallel_interpolation.py index 6967801..9089e3a 100644 --- a/stpy/helpers/parallel_interpolation.py +++ b/stpy/helpers/parallel_interpolation.py @@ -1,7 +1,5 @@ -from typing import overload -from torch.multiprocessing import Pool, set_start_method +from torch.multiprocessing import Pool from os import cpu_count -import line_profiler import torch import numpy as np from scipy.spatial import Delaunay, cKDTree @@ -181,16 +179,18 @@ def __call__(self, xp: torch.Tensor) -> torch.Tensor: simplices_list.append(simplices) exact_matches_idx = np.concatenate(exact_matches_idx_list) - exact_matches_idx = torch.tensor(exact_matches_idx) + exact_matches_idx = torch.tensor(exact_matches_idx, device=self.y.device) exact_matches_y = self.y[exact_matches_idx] if len(exact_matches_y) == len(xp): return exact_matches_y exact_match_mask = np.concatenate(exact_match_mask_list) - exact_match_mask = torch.tensor(exact_match_mask) + exact_match_mask = torch.tensor(exact_match_mask, device=self.y.device) simplices_remaining = np.concatenate(simplices_list) - simplices_remaining = torch.tensor(simplices_remaining) # (B,) + simplices_remaining = torch.tensor( + simplices_remaining, device=self.y.device + ) # (B,) xp_remaining = xp[~exact_match_mask] diff --git a/stpy/point_processes/poisson_rate_estimator.py b/stpy/point_processes/poisson_rate_estimator.py index 720aa22..51ae795 100644 --- a/stpy/point_processes/poisson_rate_estimator.py +++ b/stpy/point_processes/poisson_rate_estimator.py @@ -62,6 +62,8 @@ def __init__( roi: torch.Tensor | BorelSet | None = None, roi_discretization: int = 30, memory_limit=None, + device=torch.get_default_device(), + dtype=torch.get_default_dtype(), ): self.d = d """ Dimension of the data """ @@ -91,6 +93,8 @@ def __init__( else: self.beta = lambda t: beta self.var_cor_on = var_cor_on + self.device = device + self.dtype = dtype if basis == "triangle": self.packing = TriangleEmbedding( @@ -314,7 +318,6 @@ def fit_gp( self, threads=4, optimization_library=None, - device: torch.device = torch.get_default_device(), ): optimization_library = ( optimization_library @@ -329,7 +332,7 @@ def fit_gp( if optimization_library == "cvxpy": self.penalized_likelihood(threads=threads) elif optimization_library == "torch": - self.penalized_likelihood_fast(device=device) + self.penalized_likelihood_fast() else: raise NotImplementedError( "The optimization method does not exist" @@ -436,7 +439,7 @@ def sample_mirror_langevin(self, steps=500, verbose=False): @ (S @ torch.tanh(y) + v) ) - y = torch.rand(size=(self.get_m(), 1), dtype=torch.float64, requires_grad=True) + y = torch.rand(size=(self.get_m(), 1), dtype=self.dtype, requires_grad=True) # initiallize with map sqeezed more y.data = Gamma_half @ self.rate.view(-1, 1) # u < theta < l @@ -675,7 +678,7 @@ def prox(x): self.b + 0.05 * torch.rand( - size=(self.get_m(), 1), dtype=torch.float64, requires_grad=False + size=(self.get_m(), 1), dtype=self.dtype, requires_grad=False ).view(-1, 1) ** 2 ) @@ -814,7 +817,7 @@ def prox(x): # hessian = lambda y: self.construct_covariance_matrix_laplace() y = prox( - torch.randn(size=(self.get_m(), 1), dtype=torch.float64, requires_grad=True) + torch.randn(size=(self.get_m(), 1), dtype=self.dtype, requires_grad=True) ) y.data = self.rate.view(-1, 1) @@ -906,7 +909,7 @@ def sample_hessian_positive_langevin(self, steps=500, verbose=False, stepsize=No self.b + 0.05 * torch.rand( - size=(self.get_m(), 1), dtype=torch.float64, requires_grad=True + size=(self.get_m(), 1), dtype=self.dtype, requires_grad=True ).view(-1) ** 2 ) @@ -1006,7 +1009,7 @@ def sample_mla_prime(self, steps=100, verbose=False, stepsize=None): self.b + 0.05 * torch.rand( - size=(self.get_m(), 1), dtype=torch.float64, requires_grad=True + size=(self.get_m(), 1), dtype=self.dtype, requires_grad=True ).reshape(-1, 1) ** 2 ) @@ -1093,7 +1096,7 @@ def sample_hessian_positive_langevin_2( y = ( torch.rand( - size=(self.get_m(), 1), dtype=torch.float64, requires_grad=True + size=(self.get_m(), 1), dtype=self.dtype, requires_grad=True ).view(-1) ** 2 ) @@ -1170,7 +1173,7 @@ def sample_newton_langevin(self, steps=1000, stepsize=None, verbose=False): y = ( 0.05 * torch.rand( - size=(self.get_m(), 1), dtype=torch.float64, requires_grad=True + size=(self.get_m(), 1), dtype=self.dtype, requires_grad=True ).view(-1, 1) ** 2 ) @@ -1313,22 +1316,20 @@ def sampled_lcb_ucb(self, xtest, samples=100, delta=0.1): ucb = torch.quantile(paths, 1 - delta, dim=0) return lcb, ucb - def penalized_likelihood_fast( - self, device: torch.device = torch.get_default_device() - ): + def penalized_likelihood_fast(self): l, Lambda, u = self.get_constraints() # assert torch.allclose(Lambda, torch.eye(self.m**self.d)) Gamma_half, invGamma_half = self.cov(inverse=True) - invGamma_half = invGamma_half.to(device) + invGamma_half = invGamma_half.to(self.device) s = self.s * 0.5 if self.dual == False: - p = self.phis.to(device) @ invGamma_half + p = self.phis.to(self.device) @ invGamma_half # using all points without anchor points if self.observations is not None: - o = self.observations.to(device) @ invGamma_half + o = self.observations.to(self.device) @ invGamma_half def objective(theta): return ( @@ -1348,12 +1349,12 @@ def objective(theta): # using anchor points mask = self.bucketized_counts > 0 phis = self.varphis[mask, :] - tau = self.total_bucketized_time[mask].to(device) + tau = self.total_bucketized_time[mask].to(self.device) p = phis @ invGamma_half if self.observations is not None: - observations = self.anchor_points_emb.to(device) - weights = self.anchor_weights.to(device) + observations = self.anchor_points_emb.to(self.device) + weights = self.anchor_weights.to(self.device) mask = weights > 0.0 o = observations[mask, :] @ invGamma_half @@ -1377,8 +1378,12 @@ def objective(theta): ) if isinstance(self.rate, torch.Tensor): - theta0 = torch.zeros(size=(self.get_m(), 1)).view(-1).double() - theta0.data = self.rate.data + theta0 = torch.cat( + [ + self.rate.to(self.device), + torch.zeros([self.get_m() - len(self.rate)], device=self.device), + ] + ) else: theta0 = torch.zeros(size=(self.get_m(), 1)).view(-1).double() @@ -1391,7 +1396,7 @@ def objective(theta): bounds=(l[0] + eps, u[0]), precision="float64", tol=1e-8, - torch_device=str(device), + torch_device=str(self.device), options={ "ftol": 1e-08, "gtol": 1e-08, @@ -1402,7 +1407,7 @@ def objective(theta): }, ) - self.rate = invGamma_half @ torch.tensor(res.x) + self.rate = invGamma_half @ torch.tensor(res.x, device=self.device) print(res.message) return self.rate From 1919237423795cbc08d6bf113f820b2e8425d649 Mon Sep 17 00:00:00 2001 From: peibensteine Date: Tue, 4 Feb 2025 10:16:28 +0100 Subject: [PATCH 36/39] enable single process interpolation --- stpy/helpers/parallel_interpolation.py | 98 ++++++++++++++++---------- 1 file changed, 61 insertions(+), 37 deletions(-) diff --git a/stpy/helpers/parallel_interpolation.py b/stpy/helpers/parallel_interpolation.py index 9089e3a..c070333 100644 --- a/stpy/helpers/parallel_interpolation.py +++ b/stpy/helpers/parallel_interpolation.py @@ -15,23 +15,26 @@ def _initialize(tri: Delaunay, tree: cKDTree): xtree = tree -def _find_exact_or_simplex_batch(batch: np.ndarray): - distances, idx = xtree.query(batch, k=1, distance_upper_bound=1e-7) +def _find_exact_or_simplex_batch(batch: np.ndarray, tri_local=None, xtree_local=None): + if tri_local is None: + tri_local = shared_triangulation + if xtree_local is None: + xtree_local = xtree + distances, idx = xtree_local.query(batch, k=1, distance_upper_bound=1e-7) exact_match_mask = distances <= 1e-7 batch_remaining = batch[~exact_match_mask] - simplices = shared_triangulation.find_simplex(batch_remaining) + simplices = tri_local.find_simplex(batch_remaining) outside_conv_hull_mask = simplices < 0 simplices_remaining = simplices[~outside_conv_hull_mask] exact_match_mask[~exact_match_mask] = outside_conv_hull_mask idx = idx[exact_match_mask] - if len(simplices_remaining) > 0: - no_match_mask = idx == len(xtree.data) - if no_match_mask.any(): - _, idx_no_match = xtree.query(batch[exact_match_mask][no_match_mask], k=1) - idx[no_match_mask] = idx_no_match + no_match_mask = idx == len(xtree_local.data) + if no_match_mask.any(): + _, idx_no_match = xtree_local.query(batch[exact_match_mask][no_match_mask], k=1) + idx[no_match_mask] = idx_no_match return idx, simplices_remaining, exact_match_mask @@ -48,33 +51,44 @@ def __init__(self, x: torch.Tensor, phi: torch.Tensor, m: int, num_cpu_cores=Non if num_cpu_cores is None: num_cpu_cores = cpu_count() self.num_cpu_cores = num_cpu_cores - pool = Pool(num_cpu_cores, _initialize, [tri, xtree]) - self.interpolators = [ - InterpolatorND(x, phi[:, j], tri, xtree, pool, num_cpu_cores) - for j in range(m) - ] - self.pools = [pool] + if self.num_cpu_cores >= 1: + pool = Pool(self.num_cpu_cores, _initialize, [tri, xtree]) + else: + pool = None + self.interpolators = { + 0: [ + InterpolatorND(x, phi[:, j], tri, xtree, pool, num_cpu_cores) + for j in range(m) + ] + } + self.pools = {0: pool} def __call__(self, j: int, q: torch.Tensor): - return self.interpolators[j](q).view(-1, 1) + all_interpolators = [ip for list in self.interpolators.values() for ip in list] + return all_interpolators[j](q).view(-1, 1) - def add(self, x: torch.Tensor, phi: torch.Tensor, m: int): + def set(self, i: int, x: torch.Tensor, phi: torch.Tensor, m: int): x_cpu = x.cpu().numpy() tri = Delaunay(x_cpu) xtree = cKDTree(x_cpu) - pool = Pool(self.num_cpu_cores, _initialize, [tri, xtree]) - self.interpolators.extend( - [ - InterpolatorND(x, phi[:, j], tri, xtree, pool, self.num_cpu_cores) - for j in range(m) - ] - ) - self.pools.append(pool) + if self.num_cpu_cores >= 1: + pool = Pool(self.num_cpu_cores, _initialize, [tri, xtree]) + else: + pool = None + self.interpolators[i] = [ + InterpolatorND(x, phi[:, j], tri, xtree, pool, self.num_cpu_cores) + for j in range(m) + ] + if i in self.pools and self.pools[i] is not None: + self.pools[i].close() + self.pools[i].join() + self.pools[i] = pool def __del__(self): - for pool in self.pools: - pool.close() - pool.join() + for pool in self.pools.values(): + if pool is not None: + pool.close() + pool.join() class InterpolatorND: @@ -115,12 +129,17 @@ def __init__( # Build the Delaunay triangulation on CPU self.tri = Delaunay(x_cpu) - xtree = cKDTree(x_cpu) + self.xtree = cKDTree(x_cpu) if num_cpu_cores is None: num_cpu_cores = cpu_count() self.num_cpu_cores = num_cpu_cores - self.pool = Pool(num_cpu_cores, _initialize, [self.tri, xtree]) + if self.num_cpu_cores >= 1: + self.pool = Pool(num_cpu_cores, _initialize, [self.tri, self.xtree]) + self.own_pool = True + else: + self.pool = None + self.own_pool = False self.own_pool = True self.x = x @@ -142,7 +161,7 @@ def __init__( self.v0 = v0 # Store v0 for barycentric computation def __del__(self): - if self.own_pool: + if self.own_pool and self.pool is not None: self.pool.close() self.pool.join() @@ -163,11 +182,16 @@ def __call__(self, xp: torch.Tensor) -> torch.Tensor: # simplex_idx = self.tri.find_simplex(xp_cpu) # (B,) # Split xp_cpu into batches for parallel processing - batches = np.array_split(xp_cpu, self.num_cpu_cores) - # Use multiprocessing to parallelize find_simplex - results = self.pool.map_async( - _find_exact_or_simplex_batch, [batch for batch in batches] - ).get(timeout=10) + if self.pool is not None: + batches = np.array_split(xp_cpu, self.num_cpu_cores) + # Use multiprocessing to parallelize find_simplex + results = self.pool.map_async( + _find_exact_or_simplex_batch, [batch for batch in batches] + ).get(timeout=10) + else: + # Run find_simplex sequentially + results = [_find_exact_or_simplex_batch(xp_cpu, self.tri, self.xtree)] + # Concatenate the results back into a single array # results = [(out_exact_matches0, xp0, simplices0), (out_exact_matches1, xp1, simplices1), ...] exact_matches_idx_list = [] @@ -244,7 +268,7 @@ def simple_function(x, y): y_train = torch.tensor(z_flat, dtype=torch.float64, device="cuda") # Create the interpolator - interpolator = InterpolatorND(x_train, y_train, num_cpu_cores=1) + interpolator = InterpolatorND(x_train, y_train, num_cpu_cores=0) # Generate slightly offset query points n_query = 21 @@ -343,4 +367,4 @@ def interploate_between(): import numpy as np import matplotlib.pyplot as plt - interploate_between() + plot_simple_function() From 24736bea73999936cb3f856074924e431868e7bc Mon Sep 17 00:00:00 2001 From: peibensteine Date: Tue, 4 Feb 2025 10:23:41 +0100 Subject: [PATCH 37/39] refactor GPU usage and update NMF parameters for optimal performance --- stpy/embeddings/optimal_positive_basis.py | 8 ++++---- stpy/embeddings/positive_embedding.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/stpy/embeddings/optimal_positive_basis.py b/stpy/embeddings/optimal_positive_basis.py index ccbf774..2768527 100644 --- a/stpy/embeddings/optimal_positive_basis.py +++ b/stpy/embeddings/optimal_positive_basis.py @@ -291,7 +291,7 @@ def _fit_data(self, data): self.F_data, n_components=self.m, tol=1e-12, - use_gpu=True, + use_gpu=self.F_data.is_cuda, batch_max_iter=2000, fp_precision=self.F_data.dtype, ) @@ -352,15 +352,15 @@ def add_new_functions(self, roi: torch.Tensor, n: int): Phi_new, Theta_new, err = run_nmf( objective, n_components=n, - tol=1e-8, + tol=1e-7, use_gpu=True, - batch_max_iter=600, + batch_max_iter=100, fp_precision=objective.dtype, ) Phi_new = torch.tensor(Phi_new) self.Phi = Phi_new / torch.linalg.norm(Phi_new, dim=0) self.m = self.data_m + n - self.interpolators.add(x, self.Phi, n) + self.interpolators.set(1, x, self.Phi, n) self.precomp = False self.precomp_integral = {} diff --git a/stpy/embeddings/positive_embedding.py b/stpy/embeddings/positive_embedding.py index c523e35..bef991b 100644 --- a/stpy/embeddings/positive_embedding.py +++ b/stpy/embeddings/positive_embedding.py @@ -136,7 +136,7 @@ def cov(self, inverse=False): return self.Gamma_half def embed_internal(self, x): - """Returns a tensor $T$ where $T_{i,j} = \phi_j(x_i)$.""" + r"""Returns a tensor $T$ where $T_{i,j} = \phi_j(x_i)$.""" if self.d == 1: out = torch.zeros(size=(x.size()[0], self.m), dtype=torch.float64) for j in range(self.m): From d44a23982b6ba0a63c367c71ba58ac9f11e892fe Mon Sep 17 00:00:00 2001 From: peibensteine Date: Sat, 8 Feb 2025 07:56:50 +0100 Subject: [PATCH 38/39] precomp naming fix --- stpy/embeddings/bump_bases.py | 6 +++--- stpy/embeddings/triangle_base.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/stpy/embeddings/bump_bases.py b/stpy/embeddings/bump_bases.py index 505d42d..5d86e2c 100644 --- a/stpy/embeddings/bump_bases.py +++ b/stpy/embeddings/bump_bases.py @@ -79,8 +79,8 @@ def integral(self, S): :param S: borel set :return: $\int_S \Phi(x) dx$ """ - if S in self.procomp_integrals.keys(): - return self.procomp_integrals[S] + if S in self.precomp_integral.keys(): + return self.precomp_integral[S] else: assert S.d == self.d @@ -121,7 +121,7 @@ def integral(self, S): Gamma_half = self.cov() emb = psi @ Gamma_half - self.procomp_integrals[S] = emb + self.precomp_integral[S] = emb return emb diff --git a/stpy/embeddings/triangle_base.py b/stpy/embeddings/triangle_base.py index 2e43676..4b040ad 100644 --- a/stpy/embeddings/triangle_base.py +++ b/stpy/embeddings/triangle_base.py @@ -60,8 +60,8 @@ def integral(self, S): :param S: borel set :return: $\int_S \Phi(x) dx$ """ - if S in self.procomp_integrals.keys(): - return self.procomp_integrals[S] + if S in self.precomp_integral.keys(): + return self.precomp_integral[S] else: assert S.d == self.d @@ -81,5 +81,5 @@ def integral(self, S): Gamma_half = self.cov() emb = psi @ Gamma_half - self.procomp_integrals[S] = emb + self.precomp_integral[S] = emb return emb From f2b2b53166f8934b6cdec3d20c76c8b3d33e462c Mon Sep 17 00:00:00 2001 From: peibensteine Date: Sat, 8 Feb 2025 08:11:50 +0100 Subject: [PATCH 39/39] add logcox process --- stpy/point_processes/log_cox_process.py | 247 ++++++++++++++++++++++++ 1 file changed, 247 insertions(+) create mode 100644 stpy/point_processes/log_cox_process.py diff --git a/stpy/point_processes/log_cox_process.py b/stpy/point_processes/log_cox_process.py new file mode 100644 index 0000000..03e1780 --- /dev/null +++ b/stpy/point_processes/log_cox_process.py @@ -0,0 +1,247 @@ +from functools import partial +from typing import List +import numpy as np +import scipy +from stpy.borel_set import BorelSet +from stpy.kernels import KernelFunction +from tqdm import tqdm +from autograd_minimize import minimize +import torch + +device = torch.get_default_device() + + +def sqrt(matrix: torch.Tensor) -> torch.Tensor: + return torch.from_numpy( + np.real(scipy.linalg.sqrtm(matrix.cpu().numpy() + 1e-5)) + ).to(device) + + +class LogCoxProcess: + def __init__(self, kernel_object: KernelFunction, integral_discretization: int): + self.kernel_object = kernel_object + self.kernel = kernel_object.kernel + self.integral_discretization = integral_discretization + + def load_data(self, data: List): + # only works with 2d data! + observations = [] + self.areas = [] + dts = [] + a_xs = [] + a_ys = [] + b_xs = [] + b_ys = [] + + for A, x, dt in data: + observations.append(x) + a_xs.append(A.bounds[0][0]) + b_xs.append(A.bounds[0][1]) + a_ys.append(A.bounds[1][0]) + b_ys.append(A.bounds[1][1]) + dts.append(dt) + self.areas.append((A, dt)) + + self.observations = torch.cat(observations, dim=0) + self.dt = torch.tensor(dts, dtype=torch.float64) + self.a_x = torch.tensor(a_xs) + self.a_y = torch.tensor(a_ys) + self.b_x = torch.tensor(b_xs) + self.b_y = torch.tensor(b_ys) + + def fit(self): + # Get the map by representer theorem + k_func = partial(self.kernel, b=self.observations) + k_int = self.kernel_object.integral(self.a_x, self.a_y, self.b_x, self.b_y) + k_obs = torch.cat( + ( + k_func(a=self.observations), + self.dt.unsqueeze(1) * k_int(self.observations), + ) + ) + + k_weights = [] + k_nodes = [] + k_factors = [] + for A, dt in self.areas: + weights, nodes = A.return_legendre_discretization( + self.integral_discretization + ) + nodes = nodes.to(device) + weights = weights.to(device) + k_n = torch.cat((k_func(a=nodes), self.dt.unsqueeze(1) * k_int(nodes))) + k_weights.append(weights) + k_nodes.append(k_n) + k_factors.append(dt) + + k_int_int = [] + for A, dt in self.areas: + weights, nodes = A.return_legendre_discretization( + self.integral_discretization + ) + nodes = nodes.to(device) + weights = weights.to(device) + integral = dt * torch.sum( + weights * self.dt.unsqueeze(1) * k_int(nodes), dim=1 + ) # sum over nodes + k_int_int.append(integral) + + k_int_int = torch.stack(k_int_int) + k_obs_obs = k_func(a=self.observations) + k_int_obs = self.dt.unsqueeze(1) * k_int( + self.observations + ) # number of observations is columns + k_obs_int = k_int_obs.T + + # Create one big kernel matrix out of the above four matrices + k_top = torch.cat((k_obs_obs, k_obs_int), dim=1) + k_bottom = torch.cat((k_int_obs, k_int_int), dim=1) + k_big = torch.cat((k_top, k_bottom), dim=0) + + # Check if k_big is above zero everywhere + assert torch.all(k_big >= 0), "Kernel matrix should be strictly positive" + + # Check if k_big is approximately symmetric + assert torch.allclose( + k_big, k_big.T, atol=1e-4 + ), "Kernel matrix should be approximately symmetric" + + def objective(alpha): + lkl_term_1 = (alpha @ k_obs).sum() # Should be a single number now + lkl_term_2 = torch.sum( + torch.stack( + [ + dt * torch.sum(w * torch.exp(alpha @ kn)) + for w, kn, dt in zip(k_weights, k_nodes, k_factors) + ] + ) + ) + + regularizer = alpha.T @ k_big @ alpha + return -lkl_term_1 + lkl_term_2 + regularizer * 0.5 + + alpha_0 = torch.zeros([len(self.observations) + len(self.a_x)]) + res = minimize( + objective, + alpha_0.cpu().numpy(), + backend="torch", + method="L-BFGS-B", + precision="float64", + tol=1e-8, + torch_device=str(device), + options={ + "ftol": 1e-08, + "gtol": 1e-08, + "eps": 1e-08, + "maxfun": 15000, + "maxiter": 15000, + "maxls": 20, + }, + ) + print(f"optimum found") + + self.alpha_opt = torch.tensor(res.x) + + def intensity(x: torch.tensor, dt=1): + k_obs = torch.cat((k_func(x), self.dt.unsqueeze(1) * k_int(x))) + return dt * torch.exp(torch.tensor(res.x) @ k_obs).unsqueeze(1) + + self.rate_value = intensity + + return intensity + + def get_gamma_MAP(self, n, x, a, dt, lr=0.01, max_it=10000, eps=1e-6): + mean = 0 + cov_Y = self.kernel(x, x) + Q = sqrt(cov_Y) + self.Q = Q + + def f(arg): + y = arg @ Q + mean + return (-0.5) * arg.pow(2).sum() + (y * n - torch.exp(y) * a * dt).sum() + + gamma = torch.zeros(len(x), dtype=torch.float64, requires_grad=True) + optimizer = torch.optim.SGD([gamma], lr=lr) + + # Use tqdm to show progress + prev_loss = float("inf") + for _ in tqdm(range(max_it), desc="Optimizing gamma"): + optimizer.zero_grad() + loss = -f(gamma) # we minimize -f because we want to maximize f + # if loss.item() > prev_loss: + # print("Warning: Loss did not decrease") + prev_loss = loss.item() + loss.backward() + # If gradient is smaller than eps, return + if torch.norm(gamma.grad) < eps: + print("Solved to eps") + break + optimizer.step() + + assert f(gamma) > f( + torch.distributions.MultivariateNormal( + loc=gamma, covariance_matrix=torch.eye(len(gamma)) * 50 + ).sample() + ) + + return gamma.detach() + + def sample_mala(self, n, x, a, dt, h, num_steps, burn_in_steps, initial_gamma=None): + # param n is 1d tensor with the counts of points in the cells + # param x is the discretization of the area we're interested in + # param a is either a 2d tensor with the areas of the discretization + # or a float that gives all areas + # step size h + gamma = self.get_MAP() if initial_gamma is None else initial_gamma + mean = 0 # prior mean I think? + cov_Y = self.kernel(x, x) + Q = sqrt(cov_Y) + self.Q = Q + accept_prob_sum = 0 + + # The log posterior over gamma given the data + def log_f(arg): + y = arg @ Q + mean + return (-0.5) * arg.pow(2).sum() + (y * n - torch.exp(y) * a * dt).sum() + + base_line = log_f(gamma) + + def f(arg): + return log_f(arg) # - 2 * base_line + + # Gradient of the energy + def grad(arg): + y = arg @ Q + mean + return -arg + (n - torch.exp(y) * a * dt) @ Q.T + + # mean of the proposal distribution, named \xi in paper + def r_mean_given_arg(arg): + return arg + (h / 2.0) * grad(arg) + + for i in range(num_steps): + # Proposal + proposal = torch.distributions.MultivariateNormal( + loc=r_mean_given_arg(gamma), + covariance_matrix=h * torch.eye(len(gamma), dtype=torch.float64), + ).sample() + + accept_prob = torch.exp( + f(proposal) + - (gamma - r_mean_given_arg(proposal)).pow(2).sum() / (2 * h) + ) / ( + torch.exp( + f(gamma) + - (proposal - r_mean_given_arg(gamma)).pow(2).sum() / (2 * h) + ) + ) + + if np.random.rand() < accept_prob: + gamma = proposal + + accept_prob_sum += min(accept_prob.item(), 1.0) + + if i > burn_in_steps: + yield torch.exp(gamma @ Q + mean) + + mean_accept_prob = accept_prob_sum / num_steps + print(mean_accept_prob)