From 03b9a34bc0acc00843dc7b175b4782a8f4bdbd09 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Thu, 27 Jan 2022 18:25:52 +0100 Subject: [PATCH 1/4] Remove TF1 and TF2 Tests --- batchglm/api/models/__init__.py | 11 +- batchglm/api/models/tf1/__init__.py | 3 - batchglm/api/models/tf1/glm_beta.py | 2 - batchglm/api/models/tf1/glm_nb.py | 2 - batchglm/api/models/tf1/glm_norm.py | 2 - batchglm/train/tf1/README.md | 92 -- batchglm/train/tf1/__init__.py | 0 batchglm/train/tf1/base/__init__.py | 3 - batchglm/train/tf1/base/estimator.py | 342 ---- batchglm/train/tf1/base/estimator_graph.py | 15 - batchglm/train/tf1/base/external.py | 2 - batchglm/train/tf1/base/model.py | 39 - batchglm/train/tf1/base_glm/README.md | 2 - batchglm/train/tf1/base_glm/__init__.py | 6 - .../train/tf1/base_glm/estimator_graph.py | 1394 ----------------- batchglm/train/tf1/base_glm/external.py | 3 - batchglm/train/tf1/base_glm/fim.py | 67 - batchglm/train/tf1/base_glm/hessians.py | 100 -- batchglm/train/tf1/base_glm/jacobians.py | 72 - batchglm/train/tf1/base_glm/model.py | 166 -- .../train/tf1/base_glm/reducible_tensors.py | 351 ----- batchglm/train/tf1/base_glm_all/README.md | 2 - batchglm/train/tf1/base_glm_all/__init__.py | 6 - batchglm/train/tf1/base_glm_all/estimator.py | 362 ----- .../train/tf1/base_glm_all/estimator_graph.py | 543 ------- batchglm/train/tf1/base_glm_all/external.py | 12 - .../train/tf1/base_glm_all/external_beta.py | 6 - .../train/tf1/base_glm_all/external_nb.py | 6 - .../train/tf1/base_glm_all/external_norm.py | 6 - batchglm/train/tf1/base_glm_all/fim.py | 115 -- batchglm/train/tf1/base_glm_all/hessians.py | 193 --- batchglm/train/tf1/base_glm_all/jacobians.py | 103 -- .../tf1/base_glm_all/reducible_tensors.py | 99 -- batchglm/train/tf1/external.py | 1 - batchglm/train/tf1/glm_beta/__init__.py | 7 - batchglm/train/tf1/glm_beta/estimator.py | 291 ---- .../train/tf1/glm_beta/estimator_graph.py | 12 - batchglm/train/tf1/glm_beta/external.py | 18 - batchglm/train/tf1/glm_beta/fim.py | 25 - batchglm/train/tf1/glm_beta/hessians.py | 92 -- batchglm/train/tf1/glm_beta/jacobians.py | 40 - batchglm/train/tf1/glm_beta/model.py | 133 -- .../train/tf1/glm_beta/reducible_tensors.py | 13 - .../train/tf1/glm_beta/training_strategies.py | 37 - batchglm/train/tf1/glm_nb/__init__.py | 7 - batchglm/train/tf1/glm_nb/estimator.py | 152 -- batchglm/train/tf1/glm_nb/estimator_graph.py | 12 - batchglm/train/tf1/glm_nb/external.py | 17 - batchglm/train/tf1/glm_nb/fim.py | 43 - batchglm/train/tf1/glm_nb/hessians.py | 93 -- batchglm/train/tf1/glm_nb/jacobians.py | 66 - batchglm/train/tf1/glm_nb/model.py | 136 -- .../train/tf1/glm_nb/reducible_tensors.py | 13 - .../train/tf1/glm_nb/training_strategies.py | 27 - batchglm/train/tf1/glm_norm/__init__.py | 7 - batchglm/train/tf1/glm_norm/estimator.py | 325 ---- .../train/tf1/glm_norm/estimator_graph.py | 12 - batchglm/train/tf1/glm_norm/external.py | 18 - batchglm/train/tf1/glm_norm/fim.py | 28 - batchglm/train/tf1/glm_norm/hessians.py | 66 - batchglm/train/tf1/glm_norm/jacobians.py | 41 - batchglm/train/tf1/glm_norm/model.py | 138 -- .../train/tf1/glm_norm/reducible_tensors.py | 13 - .../train/tf1/glm_norm/training_strategies.py | 27 - batchglm/train/tf1/ops.py | 59 - batchglm/train/tf1/train.py | 315 ---- .../unit_test/test_acc_analytic_glm_all.py | 373 ----- .../test_acc_constrained_vglm_all.py | 140 -- batchglm/unit_test/test_acc_glm_all.py | 528 ------- batchglm/unit_test/test_acc_glm_all_tf2.py | 524 ------- .../unit_test/test_acc_sizefactors_glm_all.py | 103 -- batchglm/unit_test/test_hessians_glm_all.py | 187 --- batchglm/unit_test/test_jacobians_glm_all.py | 192 --- .../unit_test/test_jacobians_glm_all_tf2.py | 186 --- batchglm/unit_test/test_simulators_glm_all.py | 128 -- 75 files changed, 1 insertion(+), 8771 deletions(-) delete mode 100644 batchglm/api/models/tf1/__init__.py delete mode 100644 batchglm/api/models/tf1/glm_beta.py delete mode 100644 batchglm/api/models/tf1/glm_nb.py delete mode 100644 batchglm/api/models/tf1/glm_norm.py delete mode 100644 batchglm/train/tf1/README.md delete mode 100644 batchglm/train/tf1/__init__.py delete mode 100644 batchglm/train/tf1/base/__init__.py delete mode 100644 batchglm/train/tf1/base/estimator.py delete mode 100644 batchglm/train/tf1/base/estimator_graph.py delete mode 100644 batchglm/train/tf1/base/external.py delete mode 100644 batchglm/train/tf1/base/model.py delete mode 100644 batchglm/train/tf1/base_glm/README.md delete mode 100644 batchglm/train/tf1/base_glm/__init__.py delete mode 100644 batchglm/train/tf1/base_glm/estimator_graph.py delete mode 100644 batchglm/train/tf1/base_glm/external.py delete mode 100644 batchglm/train/tf1/base_glm/fim.py delete mode 100644 batchglm/train/tf1/base_glm/hessians.py delete mode 100644 batchglm/train/tf1/base_glm/jacobians.py delete mode 100644 batchglm/train/tf1/base_glm/model.py delete mode 100644 batchglm/train/tf1/base_glm/reducible_tensors.py delete mode 100644 batchglm/train/tf1/base_glm_all/README.md delete mode 100644 batchglm/train/tf1/base_glm_all/__init__.py delete mode 100644 batchglm/train/tf1/base_glm_all/estimator.py delete mode 100644 batchglm/train/tf1/base_glm_all/estimator_graph.py delete mode 100644 batchglm/train/tf1/base_glm_all/external.py delete mode 100644 batchglm/train/tf1/base_glm_all/external_beta.py delete mode 100644 batchglm/train/tf1/base_glm_all/external_nb.py delete mode 100644 batchglm/train/tf1/base_glm_all/external_norm.py delete mode 100644 batchglm/train/tf1/base_glm_all/fim.py delete mode 100644 batchglm/train/tf1/base_glm_all/hessians.py delete mode 100644 batchglm/train/tf1/base_glm_all/jacobians.py delete mode 100644 batchglm/train/tf1/base_glm_all/reducible_tensors.py delete mode 100644 batchglm/train/tf1/external.py delete mode 100644 batchglm/train/tf1/glm_beta/__init__.py delete mode 100644 batchglm/train/tf1/glm_beta/estimator.py delete mode 100644 batchglm/train/tf1/glm_beta/estimator_graph.py delete mode 100644 batchglm/train/tf1/glm_beta/external.py delete mode 100644 batchglm/train/tf1/glm_beta/fim.py delete mode 100644 batchglm/train/tf1/glm_beta/hessians.py delete mode 100644 batchglm/train/tf1/glm_beta/jacobians.py delete mode 100644 batchglm/train/tf1/glm_beta/model.py delete mode 100644 batchglm/train/tf1/glm_beta/reducible_tensors.py delete mode 100644 batchglm/train/tf1/glm_beta/training_strategies.py delete mode 100644 batchglm/train/tf1/glm_nb/__init__.py delete mode 100644 batchglm/train/tf1/glm_nb/estimator.py delete mode 100644 batchglm/train/tf1/glm_nb/estimator_graph.py delete mode 100644 batchglm/train/tf1/glm_nb/external.py delete mode 100644 batchglm/train/tf1/glm_nb/fim.py delete mode 100644 batchglm/train/tf1/glm_nb/hessians.py delete mode 100644 batchglm/train/tf1/glm_nb/jacobians.py delete mode 100644 batchglm/train/tf1/glm_nb/model.py delete mode 100644 batchglm/train/tf1/glm_nb/reducible_tensors.py delete mode 100644 batchglm/train/tf1/glm_nb/training_strategies.py delete mode 100644 batchglm/train/tf1/glm_norm/__init__.py delete mode 100644 batchglm/train/tf1/glm_norm/estimator.py delete mode 100644 batchglm/train/tf1/glm_norm/estimator_graph.py delete mode 100644 batchglm/train/tf1/glm_norm/external.py delete mode 100644 batchglm/train/tf1/glm_norm/fim.py delete mode 100644 batchglm/train/tf1/glm_norm/hessians.py delete mode 100644 batchglm/train/tf1/glm_norm/jacobians.py delete mode 100644 batchglm/train/tf1/glm_norm/model.py delete mode 100644 batchglm/train/tf1/glm_norm/reducible_tensors.py delete mode 100644 batchglm/train/tf1/glm_norm/training_strategies.py delete mode 100644 batchglm/train/tf1/ops.py delete mode 100644 batchglm/train/tf1/train.py delete mode 100644 batchglm/unit_test/test_acc_analytic_glm_all.py delete mode 100644 batchglm/unit_test/test_acc_constrained_vglm_all.py delete mode 100644 batchglm/unit_test/test_acc_glm_all.py delete mode 100644 batchglm/unit_test/test_acc_glm_all_tf2.py delete mode 100644 batchglm/unit_test/test_acc_sizefactors_glm_all.py delete mode 100644 batchglm/unit_test/test_hessians_glm_all.py delete mode 100644 batchglm/unit_test/test_jacobians_glm_all.py delete mode 100644 batchglm/unit_test/test_jacobians_glm_all_tf2.py delete mode 100644 batchglm/unit_test/test_simulators_glm_all.py diff --git a/batchglm/api/models/__init__.py b/batchglm/api/models/__init__.py index b6c68fb0..eff3c3f2 100644 --- a/batchglm/api/models/__init__.py +++ b/batchglm/api/models/__init__.py @@ -1,14 +1,5 @@ from . import numpy try: - import tensorflow as tf - if tf.__version__.split(".")[0] == "1": - from . import tf1 - else: - tf1 = None - if tf.__version__.split(".")[0] == "2": - from . import tf2 - else: - tf2 = None + from . import tf2 except ImportError: - tf1 = None tf2 = None diff --git a/batchglm/api/models/tf1/__init__.py b/batchglm/api/models/tf1/__init__.py deleted file mode 100644 index 8fbdb228..00000000 --- a/batchglm/api/models/tf1/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from . import glm_beta -from . import glm_nb -from . import glm_norm diff --git a/batchglm/api/models/tf1/glm_beta.py b/batchglm/api/models/tf1/glm_beta.py deleted file mode 100644 index ce7e336c..00000000 --- a/batchglm/api/models/tf1/glm_beta.py +++ /dev/null @@ -1,2 +0,0 @@ -from batchglm.models.glm_beta import InputDataGLM, Model, Simulator -from batchglm.train.tf1.glm_beta import Estimator \ No newline at end of file diff --git a/batchglm/api/models/tf1/glm_nb.py b/batchglm/api/models/tf1/glm_nb.py deleted file mode 100644 index fc0f72ab..00000000 --- a/batchglm/api/models/tf1/glm_nb.py +++ /dev/null @@ -1,2 +0,0 @@ -from batchglm.models.glm_nb import InputDataGLM, Model, Simulator -from batchglm.train.tf1.glm_nb import Estimator \ No newline at end of file diff --git a/batchglm/api/models/tf1/glm_norm.py b/batchglm/api/models/tf1/glm_norm.py deleted file mode 100644 index 7dc1ce0f..00000000 --- a/batchglm/api/models/tf1/glm_norm.py +++ /dev/null @@ -1,2 +0,0 @@ -from batchglm.models.glm_norm import InputDataGLM, Model, Simulator -from batchglm.train.tf1.glm_norm import Estimator \ No newline at end of file diff --git a/batchglm/train/tf1/README.md b/batchglm/train/tf1/README.md deleted file mode 100644 index 3bf0d9f0..00000000 --- a/batchglm/train/tf1/README.md +++ /dev/null @@ -1,92 +0,0 @@ -Implementation of models using Tensorflow -==== -This module contains all model estimators depending on Tensorflow. - - -Template to implement a new model estimator: ----- -First, set up a parameter definition defining all model parameters together with the corresponding dimensions: -```python -PARAMS = { - "param_1": ("samples", "variables"), - "param_2": ("variables",), - ... -} -``` -All equally-named dimensions have to be of the same size. - -Create a Tensorflow model with all necessary parameters: -```python -from impl.tf.base import TFEstimatorGraph - -class EstimatorGraph(TFEstimatorGraph): - def __init__(self, graph): - TFEstimatorGraph.__init__(self, graph) - # required by TFEstimatorGraph - self.global_step = tf.train.get_or_create_global_step() - self.init_op = ... - self.loss = ... - self.train_op = ... - # parameters: - self.param_1 = ... - self.param_2 = ... - -``` -Now create the actual Estimator for the given model: -```python -from models. import AbstractEstimator -from impl.tf.base import MonitoredTFEstimator - -class SomeEstimator(AbstractEstimator, MonitoredTFEstimator, metaclass=abc.ABCMeta): - model: EstimatorGraph - - # Set up a PARAMS property returning the previously created parameter definition: - # This property is used among other things for exporting data to NetCDF-format. - @property - def PARAMS(cls) -> dict: - return PARAMS - - def __init__(self, input_data, model=None): - if model is None: - tf.reset_default_graph() - # create model - model = EstimatorGraph(graph=tf.get_default_graph()) - - MonitoredTFEstimator.__init__(self, input_data, model) - - # The scaffold provides some information about the model graph to the training session. - # It is possible to add additional capabilities like a summary_op which writes summaries for TensorBoard - tf1 - def _scaffold(self): - with self.model.graph.as_default(): - scaffold = tf.train.Scaffold( - init_op=self.model.init_op, - summary_op=self.model.merged_summary, - saver=self.model.saver, - ) - return scaffold - - # Overwrite this method if you would like to feed additional data during the training - def train(self, *args, learning_rate=0.05, **kwargs): - tf.logging.info("learning rate: %s" % learning_rate) - super().train(feed_dict={"learning_rate:0": learning_rate}) - - # Now define all parameters requested by this model - # (defined in model..AbstractEstimator) - @property - def param_1(self): - return self.get("param_1") # equal to self.run(self.model.param_1) - @property - def param_2(self): - return self.get("param_2") # equal to self.run(self.model.param_2) - -``` - -Some additional notes: -- estimator.get("param_1") == estimator.session.run(estimator.model.param_1) -- estimator.to_xarray(param_list) needs the PARAMS definition to export the estimated parameters as - xarray.Dataset() -- All necessary parameters should be directly exposed as parameter tensors in EstimatorGraph - (e.g. EstimatorGraph().param_1) with correct shapes as defined in PARAMS. - However, this property is currently not validated automatically. - diff --git a/batchglm/train/tf1/__init__.py b/batchglm/train/tf1/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/batchglm/train/tf1/base/__init__.py b/batchglm/train/tf1/base/__init__.py deleted file mode 100644 index 67d248f6..00000000 --- a/batchglm/train/tf1/base/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .estimator import _TFEstimator -from .estimator_graph import TFEstimatorGraph -from .model import ProcessModelBase \ No newline at end of file diff --git a/batchglm/train/tf1/base/estimator.py b/batchglm/train/tf1/base/estimator.py deleted file mode 100644 index 5f5b6359..00000000 --- a/batchglm/train/tf1/base/estimator.py +++ /dev/null @@ -1,342 +0,0 @@ -import abc -from enum import Enum -import logging -import numpy as np -import pprint -import tensorflow as tf -import time -from typing import Dict, Any, Union, Iterable - -from .external import _EstimatorBase, pkg_constants - -logger = logging.getLogger("batchglm") - - -class TFEstimatorGraph(metaclass=abc.ABCMeta): - graph: tf.Graph - loss: tf.Tensor - init_op: tf.Tensor - train_op: tf.Tensor - global_step: tf.Tensor - - def __init__(self, graph=None): - if graph is None: - graph = tf.Graph() - self.graph = graph - - -class _TFEstimator(metaclass=abc.ABCMeta): - - session: tf.compat.v1.Session - feed_dict: Dict[Union[Union[tf.Tensor, tf.Operation], Any], Any] - _param_decorators: Dict[str, callable] - - def __init__( - self - ): - self.session = None - self.feed_dict = {} - self._param_decorators = dict() - - def initialize(self): - self.close_session() - self.feed_dict = {} - with self.model.graph.as_default(): - # set up session parameters - self.session = tf.compat.v1.Session(config=pkg_constants.TF_CONFIG_PROTO) - self.session.run(self._scaffold().init_op, feed_dict=self.feed_dict) - - def close_session(self): - if self.session is None: - return False - try: - self.session.close() - return True - except (tf.errors.OpError, RuntimeError): - return False - - def run(self, tensor, feed_dict=None): - if feed_dict is None: - feed_dict = self.feed_dict - - return self.session.run(tensor, feed_dict=feed_dict) - - @abc.abstractmethod - def _scaffold(self) -> tf.compat.v1.train.Scaffold: - """ - Should create a training scaffold for this Estimator's model - - :return: tf1.compat.v1.train.Scaffold object - """ - pass - - def _get_unsafe(self, key: Union[str, Iterable]) -> Union[Any, Dict[str, Any]]: - if isinstance(key, str): - return self.run(self.model.__getattribute__(key)) - elif isinstance(key, Iterable): - d = {s: self.model.__getattribute__(s) for s in key} - return self.run(d) - - def get(self, key: Union[str, Iterable]) -> Union[Any, Dict[str, Any]]: - """ - Returns the values of the tensor(s) specified by key. - - :param key: Either a string or an iterable list/set/tuple/etc. of strings - :return: Single array if `key` is a string or a dict {k: value} of arrays if `key` is a collection of strings - """ - if isinstance(key, str): - if key not in self.param_shapes(): - raise ValueError("Unknown parameter %s" % key) - elif isinstance(key, Iterable): - for k in list(key): - if k not in self.param_shapes(): - raise ValueError("Unknown parameter %s" % k) - return self._get_unsafe(key) - - @property - def global_step(self): - return self._get_unsafe("global_step") - - @property - def loss(self): - return self._get_unsafe("loss") - - def _train( - self, - *args, - learning_rate=None, - feed_dict=None, - convergence_criteria="all_converged", - stopping_criteria=None, - train_op=None, - trustregion_mode=False, - require_hessian=False, - require_fim=False, - is_batched=False, - **kwargs - ): - """ - Starts training of the model - - :param feed_dict: dict of values which will be feeded each `session.run()` - - See also feed_dict parameter of `session.run()`. - :param convergence_criteria: criteria after which the training will be interrupted. - - Currently implemented criterias: - - - "step": - stop, when the step counter reaches `stopping_criteria` - :param stopping_criteria: Additional parameter for convergence criteria. - - See parameter `convergence_criteria` for exact meaning - :param loss_window_size: specifies `N` in `convergence_criteria`. - :param train_op: uses this training operation if specified - """ - # Set default values: - if stopping_criteria is None: - if convergence_criteria == "step": - stopping_criteria = 100 - - if train_op is None: - train_op = self.model.train_op - - # Initialize: - if pkg_constants.EVAL_ON_BATCHED and is_batched: - _, _ = self.session.run( - (self.model.batched_data_model.eval_set, - self.model.model_vars.convergence_update), - feed_dict={self.model.model_vars.convergence_status: - np.repeat(False, repeats=self.model.model_vars.converged.shape[0]) - } - ) - ll_current = self.session.run(self.model.batched_data_model.norm_neg_log_likelihood) - else: - # Have to use eval1 here so that correct object is pulled in trust region. - _, _ = self.session.run( - (self.model.full_data_model.eval1_set, - self.model.model_vars.convergence_update), - feed_dict={self.model.model_vars.convergence_status: - np.repeat(False, repeats=self.model.model_vars.converged.shape[0]) - } - ) - ll_current = self.session.run(self.model.full_data_model.norm_neg_log_likelihood_eval1) - - logging.getLogger("batchglm").info( - "Step: 0 loss: %f models converged 0", - np.sum(ll_current) - ) - - # Set all to convergence status to False, this is need if multiple training strategies are run: - converged_current = np.repeat(False, repeats=self.model.model_vars.converged.shape[0]) - train_step = 0 - - def convergence_decision(convergence_status, step_counter): - if convergence_criteria == "step": - return np.any(np.logical_not(convergence_status)) and step_counter < stopping_criteria - elif convergence_criteria == "all_converged": - return np.any(np.logical_not(convergence_status)) - else: - raise ValueError("convergence_criteria %s not recognized." % convergence_criteria) - - while convergence_decision(converged_current, train_step): - t0 = time.time() - converged_prev = converged_current.copy() - ll_prev = ll_current.copy() - - ## Run update. - t_a = time.time() - if is_batched: - _ = self.session.run(self.model.batched_data_model.train_set) - else: - _ = self.session.run(self.model.full_data_model.train_set) - - if trustregion_mode: - t_b = time.time() - _, x_step = self.session.run( - (train_op["train"]["trial_op"], - train_op["update"]), - feed_dict=feed_dict - ) - t_c = time.time() - _ = self.session.run(self.model.full_data_model.eval0_set) - t_d = time.time() - train_step, _, features_updated = self.session.run( - (self.model.global_step, - train_op["train"]["update_op"], - self.model.model_vars.updated), - feed_dict=feed_dict - ) - t_e = time.time() - else: - t_b = time.time() - train_step, _, x_step, features_updated = self.session.run( - (self.model.global_step, - train_op["train"], - train_op["update"], - self.model.model_vars.updated), - feed_dict=feed_dict - ) - t_c = time.time() - - if pkg_constants.EVAL_ON_BATCHED and is_batched: - _ = self.session.run(self.model.batched_data_model.eval_set) - ll_current, jac_train = self.session.run( - (self.model.batched_data_model.norm_neg_log_likelihood, - self.model.batched_data_model.neg_jac_train_eval) - ) - else: - _ = self.session.run(self.model.full_data_model.eval1_set) - ll_current, jac_train = self.session.run( - (self.model.full_data_model.norm_neg_log_likelihood_eval1, - self.model.full_data_model.neg_jac_train_eval) - ) - t_f = time.time() - - if trustregion_mode: - logging.getLogger("batchglm").debug( - "### run time break-down: reduce op. %s, trial %s, ll %s, update %s, eval %s", - str(np.round(t_b - t_a, 3)), - str(np.round(t_c - t_b, 3)), - str(np.round(t_d - t_c, 3)), - str(np.round(t_e - t_d, 3)), - str(np.round(t_f - t_e, 3)) - ) - else: - logging.getLogger("batchglm").debug( - "### run time break-down: reduce op. %s, update %s, eval %s", - str(np.round(t_b - t_a, 3)), - str(np.round(t_c - t_b, 3)), - str(np.round(t_f - t_c, 3)) - ) - - if len(self.model.full_data_model.idx_train_loc) > 0: - x_norm_loc = np.sqrt(np.sum(np.square( - np.abs(x_step[self.model.model_vars.idx_train_loc, :]) - ), axis=0)) - else: - x_norm_loc = np.zeros([self.model.model_vars.n_features]) - - if len(self.model.full_data_model.idx_train_scale) > 0: - x_norm_scale = np.sqrt(np.sum(np.square( - np.abs(x_step[self.model.model_vars.idx_train_scale, :]) - ), axis=0)) - else: - x_norm_scale = np.zeros([self.model.model_vars.n_features]) - - # Update convergence status of non-converged features: - # Cost function value improvement: - ll_converged = (ll_prev - ll_current) / ll_prev < pkg_constants.LLTOL_BY_FEATURE - if not pkg_constants.EVAL_ON_BATCHED or not is_batched: - if np.any(ll_current > ll_prev + 1e-12): - logging.getLogger("batchglm").warning("bad update found: %i bad updates" % np.sum(ll_current > ll_prev + 1e-12)) - - converged_current = np.logical_or( - converged_prev, - np.logical_and(ll_converged, features_updated) - ) - converged_f = np.logical_and( - np.logical_not(converged_prev), - np.logical_and(ll_converged, features_updated) - ) - # Gradient norm: - if pkg_constants.EVAL_ON_BATCHED and is_batched: - jac_normalization = self.model.batch_size - else: - jac_normalization = self.model.num_observations - - if len(self.model.full_data_model.idx_train_loc) > 0: - idx_jac_loc = np.array([list(self.model.full_data_model.idx_train).index(x) - for x in self.model.full_data_model.idx_train_loc]) - grad_norm_loc = np.sum(np.abs(jac_train[:, idx_jac_loc]), axis=1) / jac_normalization - else: - grad_norm_loc = np.zeros([self.model.model_vars.n_features]) - if len(self.model.full_data_model.idx_train_scale) > 0: - idx_jac_scale = np.array([list(self.model.full_data_model.idx_train).index(x) - for x in self.model.full_data_model.idx_train_scale]) - grad_norm_scale = np.sum(np.abs(jac_train[:, idx_jac_scale]), axis=1) / jac_normalization - else: - grad_norm_scale = np.zeros([self.model.model_vars.n_features]) - converged_g = np.logical_and( - np.logical_not(converged_prev), - np.logical_and( - grad_norm_loc < pkg_constants.GTOL_BY_FEATURE_LOC, - grad_norm_scale < pkg_constants.GTOL_BY_FEATURE_SCALE - ) - ) - converged_current = np.logical_or( - converged_current, - np.logical_and( - grad_norm_loc < pkg_constants.GTOL_BY_FEATURE_LOC, - grad_norm_scale < pkg_constants.GTOL_BY_FEATURE_SCALE - ) - ) - # Step length: - converged_x = np.logical_and( - np.logical_not(converged_prev), - np.logical_and( - x_norm_loc < pkg_constants.XTOL_BY_FEATURE_LOC, - x_norm_scale < pkg_constants.XTOL_BY_FEATURE_SCALE - ) - ) - converged_current = np.logical_or( - converged_current, - np.logical_and( - x_norm_loc < pkg_constants.XTOL_BY_FEATURE_LOC, - x_norm_scale < pkg_constants.XTOL_BY_FEATURE_SCALE - ) - ) - t1 = time.time() - - self.session.run((self.model.model_vars.convergence_update), feed_dict={ - self.model.model_vars.convergence_status: converged_current - }) - logging.getLogger("batchglm").info( - "Step: %d loss: %f, converged %i in %s sec., updated %i, {f: %i, g: %i, x: %i}", - train_step, - np.sum(ll_current), - np.sum(converged_current).astype("int32"), - str(np.round(t1 - t0, 3)), - np.sum(np.logical_and(np.logical_not(converged_prev), features_updated)).astype("int32"), - np.sum(converged_f), np.sum(converged_g), np.sum(converged_x) - ) diff --git a/batchglm/train/tf1/base/estimator_graph.py b/batchglm/train/tf1/base/estimator_graph.py deleted file mode 100644 index 2b420809..00000000 --- a/batchglm/train/tf1/base/estimator_graph.py +++ /dev/null @@ -1,15 +0,0 @@ -import abc -import tensorflow as tf - - -class TFEstimatorGraph(metaclass=abc.ABCMeta): - graph: tf.Graph - loss: tf.Tensor - init_op: tf.Tensor - train_op: tf.Tensor - global_step: tf.Tensor - - def __init__(self, graph=None): - if graph is None: - graph = tf.Graph() - self.graph = graph diff --git a/batchglm/train/tf1/base/external.py b/batchglm/train/tf1/base/external.py deleted file mode 100644 index 5dd321e1..00000000 --- a/batchglm/train/tf1/base/external.py +++ /dev/null @@ -1,2 +0,0 @@ -from batchglm.models.base import _EstimatorBase -from batchglm import pkg_constants diff --git a/batchglm/train/tf1/base/model.py b/batchglm/train/tf1/base/model.py deleted file mode 100644 index ebfec6e0..00000000 --- a/batchglm/train/tf1/base/model.py +++ /dev/null @@ -1,39 +0,0 @@ -import abc -import logging - -import tensorflow as tf -import numpy as np - -logger = logging.getLogger(__name__) - - -class ProcessModelBase: - - @abc.abstractmethod - def param_bounds(self, dtype): - pass - - def tf_clip_param( - self, - param, - name - ): - bounds_min, bounds_max = self.param_bounds(param.dtype) - return tf.clip_by_value( - param, - bounds_min[name], - bounds_max[name] - ) - - def np_clip_param( - self, - param, - name - ): - bounds_min, bounds_max = self.param_bounds(param.dtype) - return np.clip( - param, - bounds_min[name], - bounds_max[name], - # out=param - ) diff --git a/batchglm/train/tf1/base_glm/README.md b/batchglm/train/tf1/base_glm/README.md deleted file mode 100644 index eea79ccc..00000000 --- a/batchglm/train/tf1/base_glm/README.md +++ /dev/null @@ -1,2 +0,0 @@ -# Classes with GLM specific code. -All noise models that are in the GLM category inherit all of these classes. \ No newline at end of file diff --git a/batchglm/train/tf1/base_glm/__init__.py b/batchglm/train/tf1/base_glm/__init__.py deleted file mode 100644 index c77b285b..00000000 --- a/batchglm/train/tf1/base_glm/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -from .estimator_graph import GradientGraphGLM, NewtonGraphGLM, TrainerGraphGLM, EstimatorGraphGLM, FullDataModelGraphGLM, BatchedDataModelGraphGLM -from .hessians import HessiansGLM -from .fim import FIMGLM -from .jacobians import JacobiansGLM -from .model import ProcessModelGLM, ModelVarsGLM, BasicModelGraphGLM -from .reducible_tensors import ReducableTensorsGLM diff --git a/batchglm/train/tf1/base_glm/estimator_graph.py b/batchglm/train/tf1/base_glm/estimator_graph.py deleted file mode 100644 index 543c6468..00000000 --- a/batchglm/train/tf1/base_glm/estimator_graph.py +++ /dev/null @@ -1,1394 +0,0 @@ -import abc -import logging -from typing import Union - -import numpy as np -import tensorflow as tf - -try: - import anndata -except ImportError: - anndata = None - -from .model import ModelVarsGLM -from .fim import FIMGLM -from .hessians import HessiansGLM -from .jacobians import JacobiansGLM -from .external import TFEstimatorGraph -from .external import train_utils -from .external import pkg_constants - -logger = logging.getLogger(__name__) - - -class FullDataModelGraphGLM: - """ - Computational graph to evaluate model on full data set. - - Here, we assume that the model cannot be executed on the full data set - for memory reasons and therefore divide the data set into batches, - execute the model on these batches and summarise the resulting metrics - across batches. FullDataModelGraph is therefore an extension of - BasicModelGraph that distributes operations across batches of observations. - - The distribution is performed by the function map_model(). - The model metrics which can be collected are: - - - The model likelihood (cost function value). - - Model Jacobian matrix for trainer parameters (for training). - - Model Jacobian matrix for all parameters (for downstream usage, - e.g. hypothesis tests which can also be performed on closed form MLEs). - - Model Hessian matrix for trainer parameters (for training). - - Model Hessian matrix for all parameters (for downstream usage, - e.g. hypothesis tests which can also be performed on closed form MLEs). - """ - log_likelihood: tf.Tensor - norm_log_likelihood: tf.Tensor - norm_neg_log_likelihood: tf.Tensor - loss: tf.Tensor - - jac: JacobiansGLM - neg_jac_train: tf.Tensor - - hessians: HessiansGLM - neg_hessians_train: tf.Tensor - - fim: FIMGLM - fim_train: tf.Tensor - - noise_model: str - - -class BatchedDataModelGraphGLM: - """ - Computational graph to evaluate model on batches of data set. - - The model metrics of a batch which can be collected are: - - - The model likelihood (cost function value). - - Model Jacobian matrix for trained parameters (for training). - - Model Hessian matrix for trained parameters (for training). - - Model Fisher information matrix for trained parameters (for training). - """ - log_likelihood: tf.Tensor - norm_log_likelihood: tf.Tensor - norm_neg_log_likelihood: tf.Tensor - loss: tf.Tensor - - neg_jac_train: tf.Tensor - neg_hessians_train: tf.Tensor - fim_train: tf.Tensor - - noise_model: str - - -class GradientGraphGLM: - """ - - Define newton-rhapson updates and gradients depending on whether data is batched. - The has to be distinguished as there are different jacobians - and hessians for the full and the batched data. - """ - model_vars: ModelVarsGLM - full_data_model: FullDataModelGraphGLM - batched_data_model: BatchedDataModelGraphGLM - - def __init__( - self, - model_vars: ModelVarsGLM, - full_data_model: FullDataModelGraphGLM, - batched_data_model: BatchedDataModelGraphGLM, - train_loc, - train_scale - ): - self.gradients_full_raw = None - self.gradients_batch_raw = None - self.model_vars = model_vars - self.full_data_model = full_data_model - self.batched_data_model = batched_data_model - - if train_loc or train_scale: - self.gradients_full() - if self.batched_data_model is not None: - self.gradients_batched() - - # Pad gradients to receive update tensors that match - # the shape of model_vars.params. - if train_loc: - if train_scale: - if self.batched_data_model is not None: - gradients_batch = self.gradients_batch_raw - gradients_full = self.gradients_full_raw - else: - if self.batched_data_model is not None: - gradients_batch = tf.concat([ - self.gradients_batch_raw, - tf.zeros_like(self.model_vars.b_var) - ], axis=0) - gradients_full = tf.concat([ - self.gradients_full_raw, - tf.zeros_like(self.model_vars.b_var) - ], axis=0) - else: - if self.batched_data_model is not None: - gradients_batch = tf.concat([ - tf.zeros_like(self.model_vars.a_var), - self.gradients_batch_raw - ], axis=0) - gradients_full = tf.concat([ - tf.zeros_like(self.model_vars.a_var), - self.gradients_full_raw - ], axis=0) - else: - # These gradients are returned for convergence evaluation. - # In this case, closed form estimates were used, one could - # still evaluate the gradients here but we do not do - # this to speed up run time. - if self.batched_data_model is not None: - gradients_batch = tf.zeros_like(self.model_vars.params) - gradients_full = tf.zeros_like(self.model_vars.params) - - # Save attributes necessary for reinitialization: - self.train_loc = train_loc - self.train_scale = train_scale - - self.gradients_full = gradients_full - if self.batched_data_model is not None: - self.gradients_batch = gradients_batch - else: - self.gradients_batch = None - - def gradients_full(self): - gradients_full = tf.transpose(self.full_data_model.neg_jac_train) - self.gradients_full_raw = gradients_full - - def gradients_batched(self): - gradients_batch = tf.transpose(self.batched_data_model.neg_jac_train) - self.gradients_batch_raw = gradients_batch - - -class NewtonGraphGLM: - """ - Define update vectors which require a matrix inversion: Newton-Raphson and - IRLS updates. - - Define newton-type updates and gradients depending on whether data is batched. - This has to be distinguished as there are different jacobians - and hessians for the full and the batched data. - """ - model_vars: tf.Tensor - full_data_model: FullDataModelGraphGLM - batched_data_model: BatchedDataModelGraphGLM - - nr_update_full: Union[tf.Tensor, None] - nr_update_batched: Union[tf.Tensor, None] - nr_tr_update_full: Union[tf.Tensor, None] - nr_tr_update_batched: Union[tf.Tensor, None] - - irls_update_full: Union[tf.Tensor, None] - irls_update_batched: Union[tf.Tensor, None] - irls_tr_update_full: Union[tf.Tensor, None] - irls_tr_update_batched: Union[tf.Tensor, None] - - nr_tr_radius: Union[tf.Variable, None] - nr_tr_pred_cost_gain_full: Union[tf.Tensor, None] - nr_tr_pred_cost_gain_batched: Union[tf.Tensor, None] - - irls_tr_radius: Union[tf.Variable, None] - irls_tr_pred_cost_gain_full: Union[tf.Tensor, None] - irls_tr_pred_cost_gain_batched: Union[tf.Tensor, None] - - def __init__( - self, - provide_optimizers, - train_mu, - train_r, - dtype - ): - if train_mu or train_r: - if provide_optimizers["nr"] or provide_optimizers["nr_tr"]: - if self.batched_data_model is None: - batched_lhs = None - batched_rhs = None - else: - batched_lhs = self.batched_data_model.neg_hessians_train - batched_rhs = self.batched_data_model.neg_jac_train - - nr_update_full_raw, nr_update_batched_raw = self.build_updates_nr( - full_lhs=self.full_data_model.neg_hessians_train, - batched_lhs=batched_lhs, - full_rhs=self.full_data_model.neg_jac_train, - batched_rhs=batched_rhs, - psd=False - ) - nr_update_full, nr_update_batched = self.pad_updates( - train_mu=train_mu, - train_r=train_r, - update_full_raw=nr_update_full_raw, - update_batched_raw=nr_update_batched_raw - ) - - self.nr_tr_x_step_full = tf.Variable(tf.zeros_like(nr_update_full)) - if self.batched_data_model is None: - self.nr_tr_x_step_batched = None - else: - self.nr_tr_x_step_batched = tf.Variable(tf.zeros_like(nr_update_batched)) - else: - nr_update_full = None - nr_update_batched = None - - if provide_optimizers["nr_tr"]: - self.nr_tr_radius = tf.Variable( - np.zeros(shape=[self.model_vars.n_features]) + pkg_constants.TRUST_REGION_RADIUS_INIT, - dtype=dtype - ) - self.nr_tr_ll_prev_full = tf.Variable(np.zeros(shape=[self.model_vars.n_features])) - self.nr_tr_pred_gain_full = tf.Variable(np.zeros(shape=[self.model_vars.n_features])) - - if self.batched_data_model is None: - self.nr_tr_ll_prev_batched = None - self.nr_tr_pred_gain_batched = None - else: - self.nr_tr_ll_prev_batched = tf.Variable(np.zeros(shape=[self.model_vars.n_features])) - self.nr_tr_pred_gain_batched = tf.Variable(np.zeros(shape=[self.model_vars.n_features])) - - n_obs = tf.cast(self.full_data_model.num_observations, dtype=dtype) - - nr_tr_proposed_vector_full = self.trust_region_newton_update( - update_raw=nr_update_full_raw, - radius_container=self.nr_tr_radius, - n_obs=self.num_observations_tf - ) - nr_tr_pred_cost_gain_full = self.trust_region_newton_cost_gain( - proposed_vector=nr_tr_proposed_vector_full, - neg_jac=self.full_data_model.neg_jac_train, - hessian_fim=self.full_data_model.neg_hessians_train, - n_obs=self.num_observations_tf - ) - - if self.batched_data_model is not None: - nr_tr_proposed_vector_batched = self.trust_region_newton_update( - update_raw=nr_update_batched_raw, - radius_container=self.nr_tr_radius, - n_obs=self.batch_size_tf - ) - nr_tr_pred_cost_gain_batched = self.trust_region_newton_cost_gain( - proposed_vector=nr_tr_proposed_vector_full, - neg_jac=self.batched_data_model.neg_jac_train, - hessian_fim=self.batched_data_model.neg_hessians_train, - n_obs=self.batch_size_tf - ) - else: - nr_tr_pred_cost_gain_batched = None - nr_tr_proposed_vector_batched = None - - nr_tr_proposed_vector_full_pad, nr_tr_proposed_vector_batched_pad = self.pad_updates( - train_mu=train_mu, - train_r=train_r, - update_full_raw=nr_tr_proposed_vector_full, - update_batched_raw=nr_tr_proposed_vector_batched - ) - - train_ops_nr_tr_full = self.trust_region_ops( - likelihood_container=self.nr_tr_ll_prev_full, - proposed_vector=nr_tr_proposed_vector_full_pad, - proposed_vector_container=self.nr_tr_x_step_full, - proposed_gain=nr_tr_pred_cost_gain_full, - proposed_gain_container=self.nr_tr_pred_gain_full, - radius_container=self.nr_tr_radius, - dtype=dtype - ) - if self.batched_data_model is not None: - train_ops_nr_tr_batched = self.trust_region_ops( - likelihood_container=self.nr_tr_ll_prev_batched, - proposed_vector=nr_tr_proposed_vector_batched_pad, - proposed_vector_container=self.nr_tr_x_step_batched, - proposed_gain=nr_tr_pred_cost_gain_batched, - proposed_gain_container=self.nr_tr_pred_gain_batched, - radius_container=self.nr_tr_radius, - dtype=dtype - ) - else: - train_ops_nr_tr_batched = None - else: - train_ops_nr_tr_full = None - train_ops_nr_tr_batched = None - self.nr_tr_radius = tf.Variable(np.array([np.inf]), dtype=dtype) - - if provide_optimizers["irls"] or provide_optimizers["irls_tr"] or \ - provide_optimizers["irls_gd"] or provide_optimizers["irls_gd_tr"]: - # Compute a and b model updates separately. - if train_mu: - # The FIM of the mean model is guaranteed to be - # positive semi-definite and can therefore be inverted - # with the Cholesky decomposition. This information is - # passed here with psd=True. - if self.batched_data_model is None: - batched_lhs = None - batched_rhs = None - else: - batched_lhs = self.batched_data_model.fim_a - batched_rhs = self.batched_data_model.neg_jac_a - - irls_update_a_full, irls_update_a_batched = self.build_updates_nr( - full_lhs=self.full_data_model.fim_a, - batched_lhs=batched_lhs, - full_rhs=self.full_data_model.neg_jac_a, - batched_rhs=batched_rhs, - psd=True - ) - else: - irls_update_a_full = None - irls_update_a_batched = None - - if train_r: - if self.batched_data_model is None: - batched_lhs = None - batched_rhs = None - else: - batched_lhs = self.batched_data_model.fim_b - batched_rhs = self.batched_data_model.neg_jac_b - if provide_optimizers["irls"] or provide_optimizers["irls_tr"]: - irls_update_b_full, irls_update_b_batched = self.build_updates_nr( - full_lhs=self.full_data_model.fim_b, - batched_lhs=batched_lhs, - full_rhs=self.full_data_model.neg_jac_b, - batched_rhs=batched_rhs, - psd=False - ) - else: - irls_update_b_full = None - irls_update_b_batched = None - if provide_optimizers["irls_gd"] or provide_optimizers["irls_gd_tr"]: - if self.batched_data_model is not None: - batched_jac = self.batched_data_model.neg_jac_b - else: - batched_jac = None - irls_gd_update_b_full, irls_gd_update_b_batched = self.build_updates_gd( - full_jac=self.full_data_model.neg_jac_b, - batched_jac=batched_jac, - ) - else: - irls_gd_update_b_full = None - irls_gd_update_b_batched = None - else: - irls_update_b_full = None - irls_update_b_batched = None - irls_gd_update_b_full = None - irls_gd_update_b_batched = None - - if provide_optimizers["irls"]: - if train_mu and train_r: - irls_update_full_raw = tf.concat([irls_update_a_full, irls_update_b_full], axis=0) - if self.batched_data_model is not None: - irls_update_batched_raw = tf.concat([irls_update_a_batched, irls_update_b_batched], axis=0) - else: - irls_update_batched_raw = None - elif train_mu: - irls_update_full_raw = irls_update_a_full - if self.batched_data_model is not None: - irls_update_batched_raw = irls_update_a_batched - else: - irls_update_batched_raw = None - elif train_r: - irls_update_full_raw = irls_update_b_full - if self.batched_data_model is not None: - irls_update_batched_raw = irls_update_b_batched - else: - irls_update_batched_raw = None - else: - irls_update_full_raw = None - if self.batched_data_model is not None: - irls_update_batched_raw = None - else: - irls_update_batched_raw = None - - irls_update_full, irls_update_batched = self.pad_updates( - train_mu=train_mu, - train_r=train_r, - update_full_raw=irls_update_full_raw, - update_batched_raw=irls_update_batched_raw - ) - - self.irls_tr_x_step_full = tf.Variable(tf.zeros_like(irls_update_full)) - if self.batched_data_model is None: - self.irls_tr_x_step_batched = None - else: - self.irls_tr_x_step_batched = tf.Variable(tf.zeros_like(irls_update_full)) - else: - irls_update_full = None - irls_update_batched = None - - if provide_optimizers["irls_gd"]: - if train_mu and train_r: - irls_gd_update_full_raw = tf.concat([irls_update_a_full, irls_gd_update_b_full], axis=0) - if self.batched_data_model is not None: - irls_gd_update_batched_raw = tf.concat([irls_update_a_batched, irls_gd_update_b_batched], axis=0) - else: - irls_gd_update_batched_raw = None - elif train_mu: - irls_gd_update_full_raw = irls_update_a_full - if self.batched_data_model is not None: - irls_gd_update_batched_raw = irls_update_a_batched - else: - irls_gd_update_batched_raw = None - elif train_r: - irls_gd_update_full_raw = irls_gd_update_b_full - if self.batched_data_model is not None: - irls_gd_update_batched_raw = irls_gd_update_b_batched - else: - irls_gd_update_batched_raw = None - else: - irls_gd_update_full_raw = None - if self.batched_data_model is not None: - irls_gd_update_batched_raw = None - else: - irls_gd_update_batched_raw = None - - irls_gd_update_full, irls_gd_update_batched = self.pad_updates( - train_mu=train_mu, - train_r=train_r, - update_full_raw=irls_gd_update_full_raw, - update_batched_raw=irls_gd_update_batched_raw - ) - - self.irls_gd_tr_x_step_full = tf.Variable(tf.zeros_like(irls_gd_update_full)) - if self.batched_data_model is None: - self.irls_gd_tr_x_step_batched = None - else: - self.irls_gd_tr_x_step_batched = tf.Variable(tf.zeros_like(irls_gd_update_batched)) - else: - irls_gd_update_full = None - irls_gd_update_batched = None - - if provide_optimizers["irls_tr"] or provide_optimizers["irls_gd_tr"]: - self.irls_tr_radius = tf.Variable( - np.zeros(shape=[self.model_vars.n_features]) + pkg_constants.TRUST_REGION_RADIUS_INIT, - dtype=dtype - ) - self.irls_tr_ll_prev_full = tf.Variable(np.zeros(shape=[self.model_vars.n_features])) - self.irls_tr_pred_gain_full = tf.Variable(np.zeros(shape=[self.model_vars.n_features])) - - if self.batched_data_model is None: - self.irls_tr_ll_prev_batched = None - self.irls_tr_pred_gain_batched = None - else: - self.irls_tr_ll_prev_batched = tf.Variable(np.zeros(shape=[self.model_vars.n_features])) - self.irls_tr_pred_gain_batched = tf.Variable(np.zeros(shape=[self.model_vars.n_features])) - - if train_mu: - irls_tr_proposed_vector_full_a = self.trust_region_newton_update( - update_raw=irls_update_a_full, - radius_container=self.irls_tr_radius, - n_obs=self.num_observations_tf - ) - irls_tr_pred_cost_gain_full_a = self.trust_region_newton_cost_gain( - proposed_vector=irls_tr_proposed_vector_full_a, - neg_jac=self.full_data_model.neg_jac_a, - hessian_fim=self.full_data_model.fim_a, - n_obs=self.num_observations_tf - ) - else: - irls_tr_proposed_vector_full_a = None - irls_tr_pred_cost_gain_full_a = None - - if train_r: - if provide_optimizers["irls_tr"]: - irls_tr_proposed_vector_full_b = self.trust_region_newton_update( - update_raw=irls_update_b_full, - radius_container=self.irls_tr_radius, - n_obs=self.num_observations_tf - ) - irls_tr_pred_cost_gain_full_b = self.trust_region_newton_cost_gain( - proposed_vector=irls_tr_proposed_vector_full_b, - neg_jac=self.full_data_model.neg_jac_b, - hessian_fim=self.full_data_model.fim_b, - n_obs=self.num_observations_tf - ) - else: - irls_tr_proposed_vector_full_b = None - irls_tr_pred_cost_gain_full_b = None - - if provide_optimizers["irls_gd_tr"]: - irls_gd_tr_proposed_vector_full_b = self.trust_region_linear_update( - update_raw=irls_gd_update_b_full, - radius_container=self.irls_tr_radius, - n_obs=self.num_observations_tf - ) - irls_gd_tr_pred_cost_gain_full_b = self.trust_region_linear_cost_gain( - proposed_vector=irls_gd_tr_proposed_vector_full_b, - neg_jac=self.full_data_model.neg_jac_b, - n_obs=self.num_observations_tf - ) - else: - irls_gd_tr_proposed_vector_full_b = None - irls_gd_tr_pred_cost_gain_full_b = None - else: - irls_tr_proposed_vector_full_b = None - irls_tr_pred_cost_gain_full_b = None - irls_gd_tr_proposed_vector_full_b = None - irls_gd_tr_pred_cost_gain_full_b = None - - if self.batched_data_model is not None: - if train_mu: - irls_tr_proposed_vector_batched_a = self.trust_region_newton_update( - update_raw=irls_update_a_batched, - radius_container=self.irls_tr_radius, - n_obs=self.batch_size_tf - ) - irls_tr_pred_cost_gain_batched_a = self.trust_region_newton_cost_gain( - proposed_vector=irls_tr_proposed_vector_batched_a, - neg_jac=self.batched_data_model.neg_jac_a, - hessian_fim=self.batched_data_model.fim_a, - n_obs=self.batch_size_tf - ) - else: - irls_tr_proposed_vector_batched_a = None - irls_tr_pred_cost_gain_batched_a = None - - if train_r: - if provide_optimizers["irls_tr"]: - irls_tr_proposed_vector_batched_b = self.trust_region_newton_update( - update_raw=irls_update_b_batched, - radius_container=self.irls_tr_radius, - n_obs=self.batch_size_tf - ) - irls_tr_pred_cost_gain_batched_b = self.trust_region_newton_cost_gain( - proposed_vector=irls_tr_proposed_vector_batched_b, - neg_jac=self.batched_data_model.neg_jac_b, - hessian_fim=self.batched_data_model.fim_b, - n_obs=self.batch_size_tf - ) - else: - irls_tr_proposed_vector_batched_b = None - irls_tr_pred_cost_gain_batched_b = None - - if provide_optimizers["irls_gd_tr"]: - irls_gd_tr_proposed_vector_batched_b = self.trust_region_linear_update( - update_raw=irls_gd_update_b_batched, - radius_container=self.irls_tr_radius, - n_obs=self.batch_size_tf - ) - irls_gd_tr_pred_cost_gain_batched_b = self.trust_region_linear_cost_gain( - proposed_vector=irls_gd_tr_proposed_vector_batched_b, - neg_jac=self.batched_data_model.neg_jac_b, - n_obs=self.batch_size_tf - ) - else: - irls_gd_tr_proposed_vector_batched_b = None - irls_gd_tr_pred_cost_gain_batched_b = None - else: - irls_tr_proposed_vector_batched_b = None - irls_tr_pred_cost_gain_batched_b = None - irls_gd_tr_proposed_vector_batched_b = None - irls_gd_tr_pred_cost_gain_batched_b = None - - if train_mu and train_r: - if provide_optimizers["irls_tr"]: - irls_tr_update_full_raw = tf.concat([irls_tr_proposed_vector_full_a, - irls_tr_proposed_vector_full_b], axis=0) - irls_tr_pred_cost_gain_full = tf.add(irls_tr_pred_cost_gain_full_a, - irls_tr_pred_cost_gain_full_b) - else: - irls_tr_update_full_raw = None - irls_tr_pred_cost_gain_full = None - - if provide_optimizers["irls_gd_tr"]: - irls_gd_tr_update_full_raw = tf.concat([irls_tr_proposed_vector_full_a, - irls_gd_tr_proposed_vector_full_b], axis=0) - irls_gd_tr_pred_cost_gain_full = tf.add(irls_tr_pred_cost_gain_full_a, - irls_gd_tr_pred_cost_gain_full_b) - else: - irls_gd_tr_update_full_raw = None - irls_gd_tr_pred_cost_gain_full = None - - if self.batched_data_model is not None: - if provide_optimizers["irls_tr"]: - irls_tr_update_batched_raw = tf.concat([irls_tr_proposed_vector_batched_a, - irls_tr_proposed_vector_batched_b], axis=0) - irls_tr_pred_cost_gain_batched = tf.add(irls_tr_pred_cost_gain_batched_a, - irls_tr_pred_cost_gain_batched_b) - else: - irls_tr_update_batched_raw = None - irls_tr_pred_cost_gain_batched = None - - if provide_optimizers["irls_gd_tr"]: - irls_gd_tr_update_batched_raw = tf.concat([irls_tr_proposed_vector_batched_a, - irls_gd_tr_proposed_vector_batched_b], axis=0) - irls_gd_tr_pred_cost_gain_batched = tf.add(irls_tr_pred_cost_gain_batched_a, - irls_gd_tr_pred_cost_gain_batched_b) - else: - irls_gd_tr_update_batched_raw = None - irls_gd_tr_pred_cost_gain_batched = None - else: - irls_tr_update_batched_raw = None - irls_gd_tr_update_batched_raw = None - irls_tr_pred_cost_gain_batched = None - irls_gd_tr_pred_cost_gain_batched = None - elif train_mu and not train_r: - irls_tr_update_full_raw = irls_tr_proposed_vector_full_a - irls_gd_tr_update_full_raw = irls_tr_proposed_vector_full_a - irls_tr_pred_cost_gain_full = irls_tr_pred_cost_gain_full_a - irls_gd_tr_pred_cost_gain_full = irls_tr_pred_cost_gain_full_a - if self.batched_data_model is not None: - irls_tr_update_batched_raw = irls_tr_proposed_vector_batched_a - irls_gd_tr_update_batched_raw = irls_tr_proposed_vector_batched_a - irls_tr_pred_cost_gain_batched = irls_tr_pred_cost_gain_batched_a - irls_gd_tr_pred_cost_gain_batched = irls_tr_pred_cost_gain_batched_a - else: - irls_tr_update_batched_raw = None - irls_gd_tr_update_batched_raw = None - irls_tr_pred_cost_gain_batched = None - irls_gd_tr_pred_cost_gain_batched = None - elif not train_mu and train_r: - if provide_optimizers["irls_tr"]: - irls_tr_update_full_raw = irls_tr_proposed_vector_full_b - irls_tr_pred_cost_gain_full = irls_tr_pred_cost_gain_full_b - else: - irls_tr_update_full_raw = None - irls_tr_pred_cost_gain_full = None - - if provide_optimizers["irls_gd_tr"]: - irls_gd_tr_update_full_raw = irls_gd_tr_proposed_vector_full_b - irls_gd_tr_pred_cost_gain_full = irls_gd_tr_pred_cost_gain_full_b - else: - irls_gd_tr_update_full_raw = None - irls_gd_tr_pred_cost_gain_full = None - - if self.batched_data_model is not None: - if provide_optimizers["irls_tr"]: - irls_tr_update_batched_raw = irls_tr_proposed_vector_batched_b - irls_tr_pred_cost_gain_batched = irls_tr_pred_cost_gain_batched_b - else: - irls_tr_update_batched_raw = None - irls_tr_pred_cost_gain_batched = None - - if provide_optimizers["irls_gd_tr"]: - irls_gd_tr_update_batched_raw = irls_gd_tr_proposed_vector_batched_b - irls_gd_tr_pred_cost_gain_batched = irls_gd_tr_pred_cost_gain_batched_b - else: - irls_gd_tr_update_batched_raw = None - irls_gd_tr_pred_cost_gain_batched = None - else: - irls_tr_update_batched_raw = None - irls_gd_tr_update_batched_raw = None - irls_tr_pred_cost_gain_batched = None - irls_gd_tr_pred_cost_gain_batched = None - else: - assert False - - if provide_optimizers["irls_tr"]: - irls_tr_update_full, irls_tr_update_batched = self.pad_updates( - train_mu=train_mu, - train_r=train_r, - update_full_raw=irls_tr_update_full_raw, - update_batched_raw=irls_tr_update_batched_raw - ) - else: - irls_tr_update_full = None - irls_tr_update_batched = None - - if provide_optimizers["irls_gd_tr"]: - irls_gd_tr_update_full, irls_gd_tr_update_batched = self.pad_updates( - train_mu=train_mu, - train_r=train_r, - update_full_raw=irls_gd_tr_update_full_raw, - update_batched_raw=irls_gd_tr_update_batched_raw - ) - else: - irls_gd_tr_update_full = None - irls_gd_tr_update_batched = None - - if provide_optimizers["irls_tr"] or provide_optimizers["irls_gd_tr"]: - self.irls_tr_x_step_full = tf.Variable(tf.zeros_like(self.model_vars.params)) - if self.batched_data_model is None: - self.irls_tr_x_step_batched = None - else: - self.irls_tr_x_step_batched = tf.Variable(tf.zeros_like(self.model_vars.params)) - else: - self.irls_tr_x_step_full = None - self.irls_tr_x_step_batched = None - - if provide_optimizers["irls_tr"]: - train_ops_irls_tr_full = self.trust_region_ops( - likelihood_container=self.irls_tr_ll_prev_full, - proposed_vector=irls_tr_update_full, - proposed_vector_container=self.irls_tr_x_step_full, - proposed_gain=irls_tr_pred_cost_gain_full, - proposed_gain_container=self.irls_tr_pred_gain_full, - radius_container=self.irls_tr_radius, - dtype=dtype - ) - if self.batched_data_model is not None: - train_ops_irls_tr_batched = self.trust_region_ops( - likelihood_container=self.irls_tr_ll_prev_batched, - proposed_vector=irls_tr_update_batched, - proposed_vector_container=self.irls_tr_x_step_batched, - proposed_gain=irls_tr_pred_cost_gain_batched, - proposed_gain_container=self.irls_tr_pred_gain_batched, - radius_container=self.irls_tr_radius, - dtype=dtype - ) - else: - train_ops_irls_tr_batched = None - else: - train_ops_irls_tr_full = None - train_ops_irls_tr_batched = None - - if provide_optimizers["irls_gd_tr"]: - train_ops_irls_gd_tr_full = self.trust_region_ops( - likelihood_container=self.irls_tr_ll_prev_full, - proposed_vector=irls_gd_tr_update_full, - proposed_vector_container=self.irls_tr_x_step_full, - proposed_gain=irls_gd_tr_pred_cost_gain_full, - proposed_gain_container=self.irls_tr_pred_gain_full, - radius_container=self.irls_tr_radius, - dtype=dtype - ) - if self.batched_data_model is not None: - train_ops_irls_gd_tr_batched = self.trust_region_ops( - likelihood_container=self.irls_tr_ll_prev_batched, - proposed_vector=irls_gd_tr_update_batched, - proposed_vector_container=self.irls_tr_x_step_batched, - proposed_gain=irls_gd_tr_pred_cost_gain_batched, - proposed_gain_container=self.irls_tr_pred_gain_batched, - radius_container=self.irls_tr_radius, - dtype=dtype - ) - else: - train_ops_irls_gd_tr_batched = None - else: - self.irls_gd_tr_x_step_full = None - self.irls_gd_tr_x_step_batched = None - train_ops_irls_gd_tr_full = None - train_ops_irls_gd_tr_batched = None - else: - train_ops_irls_tr_full = None - train_ops_irls_tr_batched = None - train_ops_irls_gd_tr_full = None - train_ops_irls_gd_tr_batched = None - self.irls_tr_radius = tf.Variable(np.array([np.inf]), dtype=dtype) - else: - nr_update_full = None - nr_update_batched = None - train_ops_nr_tr_full = None - train_ops_nr_tr_batched = None - - irls_update_full = None - irls_update_batched = None - irls_gd_update_full = None - irls_gd_update_batched = None - train_ops_irls_tr_full = None - train_ops_irls_tr_batched = None - train_ops_irls_gd_tr_full = None - train_ops_irls_gd_tr_batched = None - - self.nr_tr_radius = tf.Variable(np.array([np.inf]), dtype=dtype) - self.irls_tr_radius = tf.Variable(np.array([np.inf]), dtype=dtype) - - self.nr_update_full = nr_update_full - self.nr_update_batched = nr_update_batched - self.train_ops_nr_tr_full = train_ops_nr_tr_full - self.train_ops_nr_tr_batched = train_ops_nr_tr_batched - - self.irls_update_full = irls_update_full - self.irls_update_batched = irls_update_batched - self.irls_gd_update_full = irls_gd_update_full - self.irls_gd_update_batched = irls_gd_update_batched - self.train_ops_irls_tr_full = train_ops_irls_tr_full - self.train_ops_irls_tr_batched = train_ops_irls_tr_batched - self.train_ops_irls_gd_tr_full = train_ops_irls_gd_tr_full - self.train_ops_irls_gd_tr_batched = train_ops_irls_gd_tr_batched - - def build_updates_nr( - self, - full_lhs, - batched_rhs, - full_rhs, - batched_lhs, - psd - ): - update_full = self.newton_type_update( - lhs=full_lhs, - rhs=full_rhs, - psd=psd - ) - if batched_lhs is not None: - update_batched = self.newton_type_update( - lhs=batched_lhs, - rhs=batched_rhs, - psd=psd and pkg_constants.CHOLESKY_LSTSQS_BATCHED # This can be unstable even for fim_a. - ) - else: - update_batched = None - - return update_full, update_batched - - def build_updates_gd( - self, - full_jac, - batched_jac - ): - update_full = tf.transpose(full_jac) - if batched_jac is not None: - update_batched = tf.transpose(batched_jac) - else: - update_batched = None - - return update_full, update_batched - - def pad_updates( - self, - update_full_raw, - update_batched_raw, - train_mu, - train_r - ): - # Pad update vectors to receive update tensors that match - # the shape of model_vars.params. - if train_mu: - if train_r: - netwon_type_update_full = update_full_raw - newton_type_update_batched = update_batched_raw - else: - netwon_type_update_full = tf.concat([ - update_full_raw, - tf.zeros_like(self.model_vars.b_var) - ], axis=0) - if update_batched_raw is not None: - newton_type_update_batched = tf.concat([ - update_batched_raw, - tf.zeros_like(self.model_vars.b_var) - ], axis=0) - else: - newton_type_update_batched = None - elif train_r: - netwon_type_update_full = tf.concat([ - tf.zeros_like(self.model_vars.a_var), - update_full_raw - ], axis=0) - if update_batched_raw is not None: - newton_type_update_batched = tf.concat([ - tf.zeros_like(self.model_vars.a_var), - update_batched_raw - ], axis=0) - else: - newton_type_update_batched = None - else: - raise ValueError("No training necessary") - - return netwon_type_update_full, newton_type_update_batched - - def newton_type_update( - self, - lhs, - rhs, - psd - ): - delta_t = tf.squeeze(tf.linalg.lstsq( - lhs, - tf.expand_dims(rhs, axis=-1), - fast=psd and pkg_constants.CHOLESKY_LSTSQS - ), axis=-1) - update_tensor = tf.transpose(delta_t) - - return update_tensor - - def trust_region_newton_update( - self, - update_raw, - radius_container, - n_obs - ): - update_magnitude_sq = tf.reduce_sum(tf.square(update_raw), axis=0) - update_magnitude = tf.where( - condition=update_magnitude_sq > 0, - x=tf.sqrt(update_magnitude_sq), - y=tf.zeros_like(update_magnitude_sq) - ) - update_magnitude_inv = tf.where( - condition=update_magnitude > 0, - x=tf.divide( - tf.ones_like(update_magnitude), - update_magnitude - ), - y=tf.zeros_like(update_magnitude) - ) - update_norm = tf.multiply(update_raw,update_magnitude_inv) - update_scale = tf.minimum( - radius_container, - update_magnitude - ) - proposed_vector = tf.multiply( - update_norm, - update_scale - ) - - return proposed_vector - - def trust_region_linear_update( - self, - update_raw, - radius_container, - n_obs - ): - update_magnitude_sq = tf.reduce_sum(tf.square(update_raw), axis=0) - update_magnitude = tf.where( - condition=update_magnitude_sq > 0, - x=tf.sqrt(update_magnitude_sq), - y=tf.zeros_like(update_magnitude_sq) - ) - update_magnitude_inv = tf.where( - condition=update_magnitude > 0, - x=tf.divide( - tf.ones_like(update_magnitude), - update_magnitude - ), - y=tf.zeros_like(update_magnitude) - ) - update_norm = tf.multiply(update_raw,update_magnitude_inv) - update_scale = tf.minimum( - radius_container, - update_magnitude / n_obs # learning rate = 1 - ) - proposed_vector = tf.multiply( - update_norm, - update_scale - ) - - return proposed_vector - - def trust_region_newton_cost_gain( - self, - proposed_vector, - neg_jac, - hessian_fim, - n_obs - ): - pred_cost_gain = tf.add( - tf.einsum( - 'ni,in->n', - neg_jac, - proposed_vector - ) / n_obs, - 0.5 * tf.einsum( - 'nix,xin->n', - tf.einsum('inx,nij->njx', - tf.expand_dims(proposed_vector, axis=-1), - hessian_fim), - tf.expand_dims(proposed_vector, axis=0) - ) / tf.square(n_obs) - ) - return pred_cost_gain - - def trust_region_linear_cost_gain( - self, - proposed_vector, - neg_jac, - n_obs - ): - pred_cost_gain = tf.reduce_sum(tf.multiply( - proposed_vector, - tf.transpose(neg_jac) - ), axis=0) - return pred_cost_gain - - def trust_region_ops( - self, - likelihood_container, - proposed_vector, - proposed_vector_container, - proposed_gain, - proposed_gain_container, - radius_container, - dtype - ): - # Load hyper-parameters: - assert pkg_constants.TRUST_REGION_ETA0 < pkg_constants.TRUST_REGION_ETA1, \ - "eta0 must be smaller than eta1" - assert pkg_constants.TRUST_REGION_ETA1 <= pkg_constants.TRUST_REGION_ETA2, \ - "eta1 must be smaller than or equal to eta2" - assert pkg_constants.TRUST_REGION_T1 <= 1, "t1 must be smaller than 1" - assert pkg_constants.TRUST_REGION_T2 >= 1, "t1 must be larger than 1" - # Set trust region hyper-parameters - eta0 = tf.constant(pkg_constants.TRUST_REGION_ETA0, dtype=dtype) - eta1 = tf.constant(pkg_constants.TRUST_REGION_ETA1, dtype=dtype) - eta2 = tf.constant(pkg_constants.TRUST_REGION_ETA2, dtype=dtype) - t1 = tf.constant(pkg_constants.TRUST_REGION_T1, dtype=dtype) - t2 = tf.constant(pkg_constants.TRUST_REGION_T2, dtype=dtype) - upper_bound = tf.constant(pkg_constants.TRUST_REGION_UPPER_BOUND, dtype=dtype) - - # Phase I: Perform a trial update. - # Propose parameter update: - train_op_nr_tr_prev = tf.group( - tf.compat.v1.assign(likelihood_container, self.full_data_model.norm_neg_log_likelihood_eval1) - ) - train_op_x_step = tf.group( - tf.compat.v1.assign(proposed_vector_container, proposed_vector), - tf.compat.v1.assign(proposed_gain_container, proposed_gain) - ) - train_op_trial_update = tf.group( - tf.compat.v1.assign(self.model_vars.params, self.model_vars.params - proposed_vector) - ) - - # Phase II: Evaluate success of trial update and complete update cycle. - # Include parameter updates only if update improves cost function: - delta_f_actual = likelihood_container - self.full_data_model.norm_neg_log_likelihood_eval0 - delta_f_ratio = tf.divide(delta_f_actual, proposed_gain_container) - - # Compute parameter updates. - update_theta = tf.logical_and(delta_f_actual > eta0, tf.logical_not(self.model_vars.converged)) - update_theta_numeric = tf.expand_dims(tf.cast(update_theta, dtype), axis=0) - keep_theta_numeric = tf.ones_like(update_theta_numeric) - update_theta_numeric - theta_new_nr_tr = tf.add( - tf.multiply(self.model_vars.params + proposed_vector_container, keep_theta_numeric), # old values - tf.multiply(self.model_vars.params, update_theta_numeric) # new values - ) - - train_op_update_params = tf.compat.v1.assign(self.model_vars.params, theta_new_nr_tr) - train_op_update_status = tf.compat.v1.assign(self.model_vars.updated, update_theta) - - # Update trusted region accordingly: - decrease_radius = tf.logical_or( - delta_f_actual <= eta0, - tf.logical_and(delta_f_ratio <= eta1, tf.logical_not(self.model_vars.converged)) - ) - increase_radius = tf.logical_and( - delta_f_actual > eta0, - tf.logical_and(delta_f_ratio > eta2, tf.logical_not(self.model_vars.converged)) - ) - keep_radius = tf.logical_and(tf.logical_not(decrease_radius), - tf.logical_not(increase_radius)) - radius_update = tf.add_n([ - tf.multiply(t1, tf.cast(decrease_radius, dtype)), - tf.multiply(t2, tf.cast(increase_radius, dtype)), - tf.multiply(tf.ones_like(t1), tf.cast(keep_radius, dtype)) - ]) - radius_new = tf.minimum(tf.multiply(radius_container, radius_update), upper_bound) - train_op_update_radius = tf.compat.v1.assign(radius_container, radius_new) - - train_ops = { - "update": proposed_vector_container, - "trial_op": tf.group( - train_op_nr_tr_prev, - train_op_x_step, - train_op_trial_update - ), - "update_op": tf.group( - train_op_update_params, - train_op_update_status, - train_op_update_radius - ) - } - - return train_ops - - -class TrainerGraphGLM: - """ - - """ - model_vars: ModelVarsGLM - model_vars_eval: ModelVarsGLM - - full_data_model: FullDataModelGraphGLM - batched_data_model: BatchedDataModelGraphGLM - - gradient_graph: GradientGraphGLM - gradients_batch: tf.Tensor - gradients_full: tf.Tensor - - nr_update_full: tf.Tensor - nr_update_batched: tf.Tensor - nr_tr_update_full: tf.Tensor - nr_tr_update_batched: tf.Tensor - irls_update_full: tf.Tensor - irls_update_batched: tf.Tensor - irls_tr_update_full: tf.Tensor - irls_tr_update_batched: tf.Tensor - - nr_tr_radius: Union[tf.Variable, None] - nr_tr_pred_cost_gain_full: Union[tf.Tensor, None] - nr_tr_pred_cost_gain_batched: Union[tf.Tensor, None] - - irls_tr_radius: Union[tf.Variable, None] - irls_tr_pred_cost_gain_full: Union[tf.Tensor, None] - irls_tr_pred_cost_gain_batched: Union[tf.Tensor, None] - - num_observations: int - num_features: int - num_design_loc_params: int - num_design_scale_params: int - num_loc_params: int - num_scale_params: int - batch_size: int - - session: tf.compat.v1.Session - graph: tf.Graph - - def __init__( - self, - provide_optimizers, - train_loc, - train_scale, - dtype - ): - with tf.name_scope("training_graphs"): - global_step = tf.compat.v1.train.get_or_create_global_step() - - if (train_loc or train_scale) and self.batched_data_model is not None: - logger.debug(" ** building batched trainers") - trainer_batch = train_utils.MultiTrainer( - variables=self.model_vars.params, - gradients=self.gradients_batch, - newton_delta=self.nr_update_batched, - irls_delta=self.irls_update_batched, - irls_gd_delta=self.irls_gd_update_batched, - train_ops_nr_tr=self.train_ops_nr_tr_batched, - train_ops_irls_tr=self.train_ops_irls_tr_batched, - train_ops_irls_gd_tr=self.train_ops_irls_gd_tr_batched, - learning_rate=self.learning_rate, - global_step=global_step, - apply_gradients=lambda grad: tf.where(tf.math.is_nan(grad), tf.zeros_like(grad), grad), - provide_optimizers=provide_optimizers, - name="batch_data_trainers" - ) - batch_gradient = trainer_batch.plain_gradient_by_variable(self.model_vars.params) - batch_gradient = tf.reduce_sum(tf.abs(batch_gradient), axis=0) - else: - trainer_batch = None - batch_gradient = None - - if train_loc or train_scale: - logger.debug(" ** building full trainers") - trainer_full = train_utils.MultiTrainer( - variables=self.model_vars.params, - gradients=self.gradients_full, - newton_delta=self.nr_update_full, - irls_delta=self.irls_update_full, - irls_gd_delta=self.irls_gd_update_full, - train_ops_nr_tr=self.train_ops_nr_tr_full, - train_ops_irls_tr=self.train_ops_irls_tr_full, - train_ops_irls_gd_tr=self.train_ops_irls_gd_tr_full, - learning_rate=self.learning_rate, - global_step=global_step, - apply_gradients=lambda grad: tf.where(tf.math.is_nan(grad), tf.zeros_like(grad), grad), - provide_optimizers=provide_optimizers, - name="full_data_trainers" - ) - full_gradient = trainer_full.plain_gradient_by_variable(self.model_vars.params) - full_gradient = tf.reduce_sum(tf.abs(full_gradient), axis=0) - else: - trainer_full = None - full_gradient = None - - # # ### BFGS implementation using SciPy L-BFGS - # with tf1.name_scope("bfgs"): - # feature_idx = tf1.placeholder(dtype="int64", shape=()) - # - # X_s = tf1.gather(X, feature_idx, axis=1) - # a_s = tf1.gather(a, feature_idx, axis=1) - # b_s = tf1.gather(b, feature_idx, axis=1) - # - # model = BasicModelGraph(X_s, design_loc, design_scale, a_s, b_s, size_factors=size_factors) - # - # trainer = tf1.contrib.opt.ScipyOptimizerInterface( - # model.loss, - # method='L-BFGS-B', - # options={'maxiter': maxiter}) - - self.global_step = global_step - - self.trainer_batch = trainer_batch - self.gradient = batch_gradient - - self.trainer_full = trainer_full - self.full_gradient = full_gradient - - self.train_op = None - - @abc.abstractmethod - def param_bounds(self): - pass - - -class EstimatorGraphGLM(TFEstimatorGraph, NewtonGraphGLM, TrainerGraphGLM): - """ - The estimator graphs are all graph necessary to perform parameter updates and to - summarise a current parameter estimate. - - The estimator graph class is divided into the following major sub graphs: - - - The input pipeline: Feed data for parameter updates. - - - """ - X: Union[tf.Tensor, tf.SparseTensor] - - a_var: tf.Tensor - b_var: tf.Tensor - - model_vars: ModelVarsGLM - model_vars_eval: ModelVarsGLM - - noise_model: str - - def __init__( - self, - num_observations: int, - num_features: int, - num_design_loc_params: int, - num_design_scale_params: int, - num_loc_params: int, - num_scale_params: int, - graph: tf.Graph, - batch_size: int, - constraints_loc: np.ndarray, - constraints_scale: np.ndarray, - dtype: str - ): - """ - - :param num_observations: int - Number of observations. - :param num_features: int - Number of features. - :param num_design_loc_params: int - Number of parameters per feature in mean model. - :param num_design_scale_params: int - Number of parameters per feature in scale model. - :param graph: tf1.Graph - :param constraints_loc: tensor (all parameters x dependent parameters) or None - Tensor that encodes how complete parameter set which includes dependent - parameters arises from indepedent parameters: all = . - This tensor describes this relation for the mean model. - This form of constraints is used in vector generalized linear models (VGLMs). - Assumed to be an identity matrix if None. - :param constraints_scale: tensor (all parameters x dependent parameters) or None - Tensor that encodes how complete parameter set which includes dependent - parameters arises from indepedent parameters: all = . - This tensor describes this relation for the dispersion model. - This form of constraints is used in vector generalized linear models (VGLMs). - Assumed to be an identity matrix if None. - """ - TFEstimatorGraph.__init__( - self=self, - graph=graph - ) - - self.num_observations = num_observations - self.num_observations_tf = tf.cast(num_observations, dtype=dtype) - self.num_features = num_features - self.num_design_loc_params = num_design_loc_params - self.num_design_scale_params = num_design_scale_params - self.num_loc_params = num_loc_params - self.num_scale_params = num_scale_params - self.batch_size = batch_size - self.batch_size_tf = tf.cast(batch_size, dtype=dtype) - - self.constraints_loc = self._set_constraints( - constraints=constraints_loc, - num_design_params=self.num_design_loc_params, - dtype=dtype - ) - self.constraints_scale = self._set_constraints( - constraints=constraints_scale, - num_design_params=self.num_design_scale_params, - dtype=dtype - ) - - self.learning_rate = tf.compat.v1.placeholder(dtype, shape=(), name="learning_rate") - - def _run_trainer_init( - self, - provide_optimizers, - train_loc, - train_scale, - dtype - ): - logger.debug(" * building gradient graph") - self.gradient_graph = GradientGraphGLM( - model_vars=self.model_vars, - full_data_model=self.full_data_model, - batched_data_model=self.batched_data_model, - train_loc=train_loc, - train_scale=train_scale - ) - self.gradients_batch = self.gradient_graph.gradients_batch - self.gradients_full = self.gradient_graph.gradients_full - - logger.debug(" * building newton-type update graph") - NewtonGraphGLM.__init__( - self=self, - provide_optimizers=provide_optimizers, - train_mu=train_loc, - train_r=train_scale, - dtype=dtype - ) - - logger.debug(" * building trainers") - TrainerGraphGLM.__init__( - self=self, - provide_optimizers=provide_optimizers, - train_loc=train_loc, - train_scale=train_scale, - dtype=dtype - ) - - with tf.name_scope("init_op"): - self.init_op = tf.compat.v1.global_variables_initializer() - self.init_ops = [] - - def _set_out_var( - self, - feature_isnonzero, - dtype - ): - # ### output values: - # override all-zero features with lower bound coefficients - with tf.name_scope("output"): - logger.debug(" ** Build training graph: output") - bounds_min, bounds_max = self.param_bounds(dtype) - - param_nonzero_a_var = tf.broadcast_to(feature_isnonzero, [self.num_loc_params, self.num_features]) - alt_a = tf.broadcast_to(bounds_min["a_var"], [self.num_loc_params, self.num_features]) - a_var = tf.where( - param_nonzero_a_var, - self.model_vars.a_var, - alt_a - ) - - param_nonzero_b_var = tf.broadcast_to(feature_isnonzero, [self.num_scale_params, self.num_features]) - alt_b = tf.broadcast_to(bounds_min["b_var"], [self.num_scale_params, self.num_features]) - b_var = tf.where( - param_nonzero_b_var, - self.model_vars.b_var, - alt_b - ) - - self.a_var = a_var - self.b_var = b_var - - def _set_constraints( - self, - constraints, - num_design_params, - dtype - ): - if constraints is None: - return None - #return tf1.eye( - # num_rows=tf1.constant(num_design_params, shape=(), dtype="int32"), - # dtype=dtype - #) - else: - # Check if identity was supplied: - if constraints.shape[0] == constraints.shape[1]: - if np.sum(constraints - np.eye(constraints.shape[0], dtype=constraints.dtype)) < 1e-12: - return None - - assert constraints.shape[0] == num_design_params, "constraint dimension mismatch" - return tf.cast(constraints, dtype=dtype) - - @abc.abstractmethod - def param_bounds(self): - pass diff --git a/batchglm/train/tf1/base_glm/external.py b/batchglm/train/tf1/base_glm/external.py deleted file mode 100644 index aea90c59..00000000 --- a/batchglm/train/tf1/base_glm/external.py +++ /dev/null @@ -1,3 +0,0 @@ -import batchglm.train.tf1.train as train_utils -from batchglm.train.tf1.base import ProcessModelBase, TFEstimatorGraph -from batchglm import pkg_constants diff --git a/batchglm/train/tf1/base_glm/fim.py b/batchglm/train/tf1/base_glm/fim.py deleted file mode 100644 index 6edbb3af..00000000 --- a/batchglm/train/tf1/base_glm/fim.py +++ /dev/null @@ -1,67 +0,0 @@ -import abc -import logging - -logger = logging.getLogger(__name__) - - -class FIMGLM: - """ - Compute expected fisher information matrix (FIM) - for iteratively re-weighted least squares (IWLS or IRLS) parameter updates for GLMs. - """ - - @abc.abstractmethod - def fim_a_analytic( - self, - model - ): - pass - - @abc.abstractmethod - def fim_b_analytic( - self, - model - ): - pass - - @abc.abstractmethod - def _weight_fim_aa( - self, - loc, - scale - ): - """ - Compute for mean model IWLS update for a GLM. - - :param loc: tf1.tensor observations x features - Value of mean model by observation and feature. - :param scale: tf1.tensor observations x features - Value of dispersion model by observation and feature. - - :return tuple of tf1.tensors - Constants with respect to coefficient index for - Fisher information matrix and score function computation. - """ - pass - - @abc.abstractmethod - def _weight_fim_bb( - self, - loc, - scale - ): - """ - Compute for dispersion model IWLS update for a GLM. - - :param X: tf1.tensor observations x features - Observation by observation and feature. - :param loc: tf1.tensor observations x features - Value of mean model by observation and feature. - :param scale: tf1.tensor observations x features - Value of dispersion model by observation and feature. - - :return tuple of tf1.tensors - Constants with respect to coefficient index for - Fisher information matrix and score function computation. - """ - pass \ No newline at end of file diff --git a/batchglm/train/tf1/base_glm/hessians.py b/batchglm/train/tf1/base_glm/hessians.py deleted file mode 100644 index 60d90707..00000000 --- a/batchglm/train/tf1/base_glm/hessians.py +++ /dev/null @@ -1,100 +0,0 @@ -import abc -import logging - -import tensorflow as tf - -logger = logging.getLogger(__name__) - -class HessiansGLM: - """ - Wrapper to compute the Hessian matrix for a GLM. - """ - - def hessian_analytic( - self, - model - ) -> tf.Tensor: - raise NotImplementedError() - - def hessian_tf( - self, - model - ) -> tf.Tensor: - raise NotImplementedError() - - @abc.abstractmethod - def _weight_hessian_aa( - self, - X, - loc, - scale - ): - """ - Compute the coefficient index invariant part of the - mean model block of the hessian. - - :param X: tf1.tensor observations x features - Observation by observation and feature. - :param loc: tf1.tensor observations x features - Value of mean model by observation and feature. - :param scale: tf1.tensor observations x features - Value of dispersion model by observation and feature. - - :return const: tf1.tensor observations x features - Coefficient invariant terms of hessian of - given observations and features. - """ - pass - - @abc.abstractmethod - def _weight_hessian_bb( - self, - X, - loc, - scale - ): - """ - Compute the coefficient index invariant part of the - dispersion model block of the hessian. - - :param X: tf1.tensor observations x features - Observation by observation and feature. - :param loc: tf1.tensor observations x features - Value of mean model by observation and feature. - :param scale: tf1.tensor observations x features - Value of dispersion model by observation and feature. - - :return const: tf1.tensor observations x features - Coefficient invariant terms of hessian of - given observations and features. - """ - pass - - @abc.abstractmethod - def _weight_hessian_ab( - self, - X, - loc, - scale - ): - """ - Compute the coefficient index invariant part of the - mean-dispersion model block of the hessian. - - Note that there are two blocks of the same size which can - be compute from each other with a transpose operation as - the hessian is symmetric. - - :param X: tf1.tensor observations x features - Observation by observation and feature. - :param loc: tf1.tensor observations x features - Value of mean model by observation and feature. - :param scale: tf1.tensor observations x features - Value of dispersion model by observation and feature. - - :return const: tf1.tensor observations x features - Coefficient invariant terms of hessian of - given observations and features. - """ - pass - diff --git a/batchglm/train/tf1/base_glm/jacobians.py b/batchglm/train/tf1/base_glm/jacobians.py deleted file mode 100644 index 1eeab1d7..00000000 --- a/batchglm/train/tf1/base_glm/jacobians.py +++ /dev/null @@ -1,72 +0,0 @@ -import abc -import logging - -import tensorflow as tf - -logger = logging.getLogger(__name__) - - -class JacobiansGLM: - """ - Compute the Jacobian matrix for a GLM. - """ - - def jac_analytic( - self, - model - ) -> tf.Tensor: - raise NotImplementedError() - - def jac_tf( - self, - model - ) -> tf.Tensor: - raise NotImplementedError() - - @abc.abstractmethod - def _weights_jac_a( - self, - X, - loc, - scale - ): - """ - Compute the coefficient index invariant part of the - mean model gradient. - - :param X: tf1.tensor observations x features - Observation by observation and feature. - :param loc: tf1.tensor observations x features - Value of mean model by observation and feature. - :param scale: tf1.tensor observations x features - Value of dispersion model by observation and feature. - - :return const: tf1.tensor observations x features - Coefficient invariant terms of hessian of - given observations and features. - """ - pass - - @abc.abstractmethod - def _weights_jac_b( - self, - X, - loc, - scale - ): - """ - Compute the coefficient index invariant part of the - dispersion model gradient. - - :param X: tf1.tensor observations x features - Observation by observation and feature. - :param loc: tf1.tensor observations x features - Value of mean model by observation and feature. - :param scale: tf1.tensor observations x features - Value of dispersion model by observation and feature. - - :return const: tf1.tensor observations x features - Coefficient invariant terms of hessian of - given observations and features. - """ - pass \ No newline at end of file diff --git a/batchglm/train/tf1/base_glm/model.py b/batchglm/train/tf1/base_glm/model.py deleted file mode 100644 index c978cbd0..00000000 --- a/batchglm/train/tf1/base_glm/model.py +++ /dev/null @@ -1,166 +0,0 @@ -import abc -import logging -from typing import Union - -import tensorflow as tf -import numpy as np - -from .external import ProcessModelBase - -logger = logging.getLogger(__name__) - - -class ProcessModelGLM(ProcessModelBase): - - @abc.abstractmethod - def param_bounds(self, dtype: str): - pass - - -class ModelVarsGLM(ProcessModelGLM): - """ Build tf1.Variables to be optimzed and their constraints. - - a_var and b_var slices of the tf1.Variable params which contains - all parameters to be optimized during model estimation. - Params is defined across both location and scale model so that - the hessian can be computed for the entire model. - a and b are the clipped parameter values which also contain - constraints and constrained dependent coefficients which are not - directly optimized. - """ - - a: tf.Tensor - b: tf.Tensor - a_var: tf.Variable - b_var: tf.Variable - params: tf.Variable - converged: np.ndarray - - def __init__( - self, - dtype: str, - init_a: np.ndarray, - init_b: np.ndarray, - constraints_loc: tf.Tensor, - constraints_scale: tf.Tensor - ): - """ - - :param dtype: Precision used in tensorflow. - :param init_a: nd.array (mean model size x features) - Initialisation for all parameters of mean model. - :param init_b: nd.array (dispersion model size x features) - Initialisation for all parameters of dispersion model. - :param constraints_loc: tensor (all parameters x dependent parameters) - Tensor that encodes how complete parameter set which includes dependent - parameters arises from indepedent parameters: all = . - This tensor describes this relation for the mean model. - This form of constraints is used in vector generalized linear models (VGLMs). - :param constraints_scale: tensor (all parameters x dependent parameters) - Tensor that encodes how complete parameter set which includes dependent - parameters arises from indepedent parameters: all = . - This tensor describes this relation for the dispersion model. - This form of constraints is used in vector generalized linear models (VGLMs). - """ - self.init_a = tf.convert_to_tensor(init_a, dtype=dtype) - self.init_b = tf.convert_to_tensor(init_b, dtype=dtype) - - init_a_clipped = self.tf_clip_param(self.init_a, "a_var") - init_b_clipped = self.tf_clip_param(self.init_b, "b_var") - - # Param is the only tf1.Variable in the graph. - # a_var and b_var have to be slices of params. - self.params = tf.Variable(tf.concat( - [ - init_a_clipped, - init_b_clipped, - ], - axis=0 - ), name="params") - - # Feature batching code for future: - #idx_featurebatch = tf1.random_uniform([100], minval=0, maxval=self.params.shape[1]-1, dtype=tf1.int32) - #params_featurebatch = tf1.gather(self.params, indi [:,idx_featurebatch] - - #params_by_gene = [tf1.expand_dims(params[:, i], axis=-1) for i in range(params.shape[1])] - #a_by_gene = [x[0:init_a.shape[0],:] for x in params_by_gene] - #b_by_gene = [x[init_a.shape[0]:, :] for x in params_by_gene] - #a_var = tf1.concat(a_by_gene, axis=1) - #b_var = tf1.concat(b_by_gene, axis=1) - a_var = self.params[0:init_a.shape[0]] - b_var = self.params[init_a.shape[0]:] - - self.a_var = self.tf_clip_param(a_var, "a_var") - self.b_var = self.tf_clip_param(b_var, "b_var") - - if constraints_loc is not None: - self.a = tf.matmul(constraints_loc, self.a_var) - else: - self.a = self.a_var - - if constraints_scale is not None: - self.b = tf.matmul(constraints_scale, self.b_var) - else: - self.b = self.b_var - - # Properties to follow gene-wise convergence. - self.updated = tf.Variable(np.repeat(a=True, repeats=self.params.shape[1])) # Initialise to is updated. - self.converged = tf.Variable(np.repeat(a=False, repeats=self.params.shape[1])) # Initialise to non-converged. - self.convergence_status = tf.compat.v1.placeholder(shape=[self.params.shape[1]], dtype=tf.bool) - self.convergence_update = tf.compat.v1.assign(self.converged, self.convergence_status) - #self.params_by_gene = params_by_gene - #self.a_by_gene = a_by_gene - #self.b_by_gene = b_by_gene - - self.dtype = dtype - self.constraints_loc = constraints_loc - self.constraints_scale = constraints_scale - self.n_features = self.params.shape[1] - self.idx_train_loc = np.arange(0, init_a.shape[0]) - self.idx_train_scale = np.arange(init_a.shape[0], init_a.shape[0]+init_b.shape[0]) - - @abc.abstractmethod - def param_bounds(self, dtype): - pass - - -class BasicModelGraphGLM(ProcessModelGLM): - """ - - """ - X: Union[tf.Tensor, tf.SparseTensor] - design_loc: tf.Tensor - design_scale: tf.Tensor - constraints_loc: tf.Tensor - constraints_scale: tf.Tensor - - probs: tf.Tensor - log_likelihood: tf.Tensor - norm_log_likelihood: tf.Tensor - norm_neg_log_likelihood: tf.Tensor - loss: tf.Tensor - - @property - def probs(self): - probs = tf.exp(self.log_probs) - return self.tf_clip_param(probs, "probs") - - @property - def log_likelihood(self): - return tf.reduce_sum(self.log_probs, axis=0, name="log_likelihood") - - @property - def norm_log_likelihood(self): - return tf.reduce_mean(self.log_probs, axis=0, name="log_likelihood") - - @property - def norm_neg_log_likelihood(self): - return - self.norm_log_likelihood - - @property - def loss(self): - return tf.reduce_sum(self.norm_neg_log_likelihood) - - @abc.abstractmethod - def param_bounds(self, dtype): - pass diff --git a/batchglm/train/tf1/base_glm/reducible_tensors.py b/batchglm/train/tf1/base_glm/reducible_tensors.py deleted file mode 100644 index 45b7f753..00000000 --- a/batchglm/train/tf1/base_glm/reducible_tensors.py +++ /dev/null @@ -1,351 +0,0 @@ -import logging -from typing import Union - -import tensorflow as tf - -from batchglm.train.tf1.base_glm.model import ModelVarsGLM - -logger = logging.getLogger("batchglm") - - -class ReducableTensorsGLM: - """ - """ - - noise_model: str - constraints_loc: tf.Tensor - constraints_scale: tf.Tensor - model_vars: ModelVarsGLM - noise_model: str - compute_a: bool - compute_b: bool - - jac: Union[tf.Tensor, None] - jac_a: Union[tf.Tensor, None] - jac_b: Union[tf.Tensor, None] - neg_jac: tf.Tensor - neg_jac_a: Union[tf.Tensor, None] - neg_jac_b: Union[tf.Tensor, None] - - hessian: Union[tf.Tensor, None] - hessian_aa: Union[tf.Tensor, None] - hessian_bb: Union[tf.Tensor, None] - neg_hessian: Union[tf.Tensor, None] - neg_hessian_aa: Union[tf.Tensor, None] - neg_hessian_bb: Union[tf.Tensor, None] - - fim_a: Union[tf.Tensor, None] - fim_b: Union[tf.Tensor, None] - - neg_loglikelihood: Union[tf.Tensor, None] - - def __init__( - self, - model_vars: ModelVarsGLM, - noise_model: str, - constraints_loc, - constraints_scale, - sample_indices = None, - data_set: tf.data.Dataset = None, - data_batch: tf.Tensor = None, - mode_jac="analytic", - mode_hessian="analytic", - mode_fim="analytic", - compute_a=True, - compute_b=True, - compute_jac=True, - compute_hessian=True, - compute_fim=True, - compute_ll=True - ): - """ Return computational graph for jacobian based on mode choice. - - :param batched_data: - Dataset iterator over mini-batches of data (used for training) or tf1.Tensor of mini-batch. - :param sample_indices: Indices of samples to be used. - :param constraints_loc: np.ndarray (constraints on mean model x mean model parameters) - Constraints for location model. - Array with constraints in rows and model parameters in columns. - Each constraint contains non-zero entries for the a of parameters that - has to sum to zero. This constraint is enforced by binding one parameter - to the negative sum of the other parameters, effectively representing that - parameter as a function of the other parameters. This dependent - parameter is indicated by a -1 in this array, the independent parameters - of that constraint (which may be dependent at an earlier constraint) - are indicated by a 1. - :param constraints_scale: np.ndarray (constraints on mean model x mean model parameters) - Constraints for scale model. - Array with constraints in rows and model parameters in columns. - Each constraint contains non-zero entries for the a of parameters that - has to sum to zero. This constraint is enforced by binding one parameter - to the negative sum of the other parameters, effectively representing that - parameter as a function of the other parameters. This dependent - parameter is indicated by a -1 in this array, the independent parameters - of that constraint (which may be dependent at an earlier constraint) - are indicated by a 1. - :param mode: str - Mode by with which hessian is to be evaluated, - "analytic" uses a closed form solution of the jacobian, - "tf1" allows for evaluation of the jacobian via the tf1.gradients function. - :param iterator: bool - Whether an iterator or a tensor (single yield of an iterator) is given - in. - :param jac_a: bool - Wether to compute Jacobian for a parameters. If both jac_a and jac_b are true, - the entire jacobian is computed in self.jac. - :param jac_b: bool - Wether to compute Jacobian for b parameters. If both jac_a and jac_b are true, - the entire jacobian is computed in self.jac. - """ - assert data_set is None or data_batch is None - - self.noise_model = noise_model - self.model_vars = model_vars - self.constraints_loc = constraints_loc - self.constraints_scale = constraints_scale - - self.compute_a = compute_a - self.compute_b = compute_b - - self.mode_jac = mode_jac - self.mode_hessian = mode_hessian - self.mode_fim = mode_fim - - self.compute_jac = compute_jac - self.compute_hessian = compute_hessian - self.compute_fim_a = compute_fim and compute_a - self.compute_fim_b = compute_fim and compute_b - self.compute_ll = compute_ll - - n_var_all = self.model_vars.params.shape[0] - n_var_a = self.model_vars.a_var.shape[0] - n_var_b = self.model_vars.b_var.shape[0] - dtype = self.model_vars.dtype - self.dtype = dtype - - def map_fun(idx, data): - return self.assemble_tensors( - idx=idx, - data=data - ) - - def init_fun(): - if self.compute_a and self.compute_b: - n_var_train = n_var_all - elif self.compute_a and not self.compute_b: - n_var_train = n_var_a - elif not self.compute_a and self.compute_b: - n_var_train = n_var_b - else: - n_var_train = 0 - - if self.compute_jac and n_var_train > 0: - jac_init = tf.zeros([model_vars.n_features, n_var_train], dtype=dtype) - else: - jac_init = tf.zeros((), dtype=dtype) - - if self.compute_hessian and n_var_train > 0: - hessian_init = tf.zeros([model_vars.n_features, n_var_train, n_var_train], dtype=dtype) - else: - hessian_init = tf.zeros((), dtype=dtype) - - if self.compute_fim_a: - fim_a_init = tf.zeros([model_vars.n_features, n_var_a, n_var_a], dtype=dtype) - else: - fim_a_init = tf.zeros((), dtype=dtype) - if self.compute_fim_b: - fim_b_init = tf.zeros([model_vars.n_features, n_var_b, n_var_b], dtype=dtype) - else: - fim_b_init = tf.zeros((), dtype=dtype) - - if self.compute_ll: - ll_init = tf.zeros([model_vars.n_features], dtype=dtype) - else: - ll_init = tf.zeros((), dtype=dtype) - - return jac_init, hessian_init, fim_a_init, fim_b_init, ll_init - - def reduce_fun(old, new): - return (tf.add(old[0], new[0]), - tf.add(old[1], new[1]), - tf.add(old[2], new[2]), - tf.add(old[3], new[3]), - tf.add(old[4], new[4])) - - if data_set is not None: - set_op = data_set.reduce( - initial_state=init_fun(), - reduce_func=lambda old, new: reduce_fun(old, map_fun(new[0], new[1])) - ) - jac, hessian, fim_a, fim_b, ll = set_op - elif data_batch is not None: - set_op = map_fun( - idx=sample_indices, - data=data_batch - ) - jac, hessian, fim_a, fim_b, ll = set_op - else: - raise ValueError("supply either data_set or data_batch") - - p_shape_a = self.model_vars.a_var.shape[0] # This has to be _var to work with constraints. - - # With relay across tf1.Variable: - # Containers and specific slices and transforms: - if self.compute_a and self.compute_b: - if self.compute_jac: - self.jac = tf.Variable(tf.zeros([self.model_vars.n_features, n_var_all], dtype=dtype), dtype=dtype) - self.jac_a = self.jac[:, :p_shape_a] - self.jac_b = self.jac[:, p_shape_a:] - else: - self.jac = tf.Variable(tf.zeros((), dtype=dtype), dtype=dtype) - self.jac_a = self.jac - self.jac_b = self.jac - self.jac_train = self.jac - - if self.compute_hessian: - self.hessian = tf.Variable(tf.zeros([self.model_vars.n_features, n_var_all, n_var_all], dtype=dtype), dtype=dtype) - self.hessian_aa = self.hessian[:, :p_shape_a, :p_shape_a] - self.hessian_bb = self.hessian[:, p_shape_a:, p_shape_a:] - else: - self.hessian = tf.Variable(tf.zeros((), dtype=dtype), dtype=dtype) - self.hessian_aa = self.hessian - self.hessian_bb = self.hessian - self.hessian_train = self.hessian - - if self.compute_fim_a or self.compute_fim_b: - self.fim_a = tf.Variable(tf.zeros([self.model_vars.n_features, n_var_a, n_var_a], dtype=dtype), dtype=dtype) - self.fim_b = tf.Variable(tf.zeros([self.model_vars.n_features, n_var_b, n_var_b], dtype=dtype), dtype=dtype) - else: - self.fim_a = tf.Variable(tf.zeros((), dtype=dtype), dtype=dtype) - self.fim_b = tf.Variable(tf.zeros((), dtype=dtype), dtype=dtype) - elif self.compute_a and not self.compute_b: - if self.compute_jac: - self.jac = tf.Variable(tf.zeros([self.model_vars.n_features, n_var_a], dtype=dtype), dtype=dtype) - self.jac_a = self.jac - else: - self.jac = tf.Variable(tf.zeros((), dtype=dtype), dtype=dtype) - self.jac_a = self.jac - self.jac_b = None - self.jac_train = self.jac_a - - if self.compute_hessian: - self.hessian = tf.Variable(tf.zeros([model_vars.n_features, n_var_a, n_var_a], dtype=dtype), dtype=dtype) - self.hessian_aa = self.hessian - else: - self.hessian = tf.Variable(tf.zeros((), dtype=dtype), dtype=dtype) - self.hessian_aa = self.hessian - self.hessian_bb = None - self.hessian_train = self.hessian_aa - - if self.compute_fim_a: - self.fim_a = tf.Variable(tf.zeros([model_vars.n_features, n_var_a, n_var_a], dtype=dtype), dtype=dtype) - else: - self.fim_a = tf.Variable(tf.zeros((), dtype=dtype), dtype=dtype) - self.fim_b = tf.Variable(tf.zeros((), dtype=dtype), dtype=dtype) - elif not self.compute_a and self.compute_b: - if self.compute_jac: - self.jac = tf.Variable(tf.zeros([self.model_vars.n_features, n_var_b], dtype=dtype), dtype=dtype) - self.jac_b = self.jac - else: - self.jac = tf.Variable(tf.zeros((), dtype=dtype), dtype=dtype) - self.jac_b = self.jac - self.jac_a = None - self.jac_train = self.jac_b - - if self.compute_hessian: - self.hessian = tf.Variable(tf.zeros([model_vars.n_features, n_var_b, n_var_b], dtype=dtype), dtype=dtype) - self.hessian_bb = self.hessian - else: - self.hessian = tf.Variable(tf.zeros((), dtype=dtype), dtype=dtype) - self.hessian_bb = self.hessian - self.hessian_aa = None - self.hessian_train = self.hessian_bb - - self.fim_a = tf.Variable(tf.zeros((), dtype=dtype), dtype=dtype) - if self.compute_fim_b: - self.fim_b = tf.Variable(tf.zeros([model_vars.n_features, n_var_b, n_var_b], dtype=dtype), dtype=dtype) - else: - self.fim_b = tf.Variable(tf.zeros((), dtype=dtype), dtype=dtype) - else: - self.jac = tf.Variable(tf.zeros((), dtype=dtype), dtype=dtype) - self.jac_a = None - self.jac_b = None - self.jac_train = None - - self.hessian = tf.Variable(tf.zeros((), dtype=dtype), dtype=dtype) - self.hessian_aa = None - self.hessian_bb = None - self.hessian_train = None - - self.fim_a = tf.Variable(tf.zeros((), dtype=dtype), dtype=dtype) - self.fim_b = tf.Variable(tf.zeros((), dtype=dtype), dtype=dtype) - - if self.compute_ll: - self.ll = tf.Variable(tf.zeros([model_vars.n_features], dtype=dtype), dtype=dtype) - else: - self.ll = tf.Variable(tf.zeros((), dtype=dtype), dtype=dtype) - - self.neg_jac = tf.negative(self.jac) if self.jac is not None else None - self.neg_jac_a = tf.negative(self.jac_a) if self.jac_a is not None else None - self.neg_jac_b = tf.negative(self.jac_b) if self.jac_b is not None else None - self.neg_jac_train = tf.negative(self.jac_train) if self.jac_train is not None else None - - self.neg_hessian = tf.negative(self.hessian) if self.hessian is not None else None - self.neg_hessian_aa = tf.negative(self.hessian_aa) if self.hessian_aa is not None else None - self.neg_hessian_bb = tf.negative(self.hessian_bb) if self.hessian_bb is not None else None - self.neg_hessian_train = tf.negative(self.hessian_train) if self.hessian_train is not None else None - - self.neg_ll = tf.negative(self.ll) if self.ll is not None else None - - # Setting operation: - jac_set = tf.compat.v1.assign(self.jac, jac) - hessian_set = tf.compat.v1.assign(self.hessian, hessian) - fim_a_set = tf.compat.v1.assign(self.fim_a, fim_a) - fim_b_set = tf.compat.v1.assign(self.fim_b, fim_b) - ll_set = tf.compat.v1.assign(self.ll, ll) - - self.set = tf.group( - set_op, - jac_set, - hessian_set, - fim_a_set, - fim_b_set, - ll_set - ) - - def assemble_tensors( - self, - idx, - data - ): - raise NotImplementedError() - - def jac_analytic( - self, - model - ) -> tf.Tensor: - raise NotImplementedError() - - def jac_tf( - self, - model - ) -> tf.Tensor: - raise NotImplementedError() - - def hessian_analytic( - self, - model - ) -> tf.Tensor: - raise NotImplementedError() - - def hessian_tf( - self, - model - ) -> tf.Tensor: - raise NotImplementedError() - - def fim_analytic( - self, - model - ) -> tf.Tensor: - raise NotImplementedError() \ No newline at end of file diff --git a/batchglm/train/tf1/base_glm_all/README.md b/batchglm/train/tf1/base_glm_all/README.md deleted file mode 100644 index 730604f9..00000000 --- a/batchglm/train/tf1/base_glm_all/README.md +++ /dev/null @@ -1,2 +0,0 @@ -# Classes with conditinoal import statements that yield class properties of desired kind. -For example: EstimatorGraph receives a child of BasicModelGraphGLM appropriate for the desired noise model. This is necessary in a separate module as the lengthy constructor calls to the noise model specific children of GLM classes would otherwise have to be repeated in the class definition of each noise model. \ No newline at end of file diff --git a/batchglm/train/tf1/base_glm_all/__init__.py b/batchglm/train/tf1/base_glm_all/__init__.py deleted file mode 100644 index 3b80760c..00000000 --- a/batchglm/train/tf1/base_glm_all/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -from .estimator import TFEstimatorGLM -from .estimator_graph import EstimatorGraphAll -from .fim import FIMGLMALL -from .jacobians import JacobiansGLMALL -from .hessians import HessianGLMALL -from .reducible_tensors import ReducableTensorsGLMALL \ No newline at end of file diff --git a/batchglm/train/tf1/base_glm_all/estimator.py b/batchglm/train/tf1/base_glm_all/estimator.py deleted file mode 100644 index 7c5cd720..00000000 --- a/batchglm/train/tf1/base_glm_all/estimator.py +++ /dev/null @@ -1,362 +0,0 @@ -import abc -from enum import Enum -import logging -import numpy as np -import scipy.sparse -import tensorflow as tf -from typing import Union - -from .estimator_graph import EstimatorGraphAll -from .external import _TFEstimator, InputDataGLM, _EstimatorGLM - - -class TFEstimatorGLM(_TFEstimator, _EstimatorGLM, metaclass=abc.ABCMeta): - """ - Estimator for Generalized Linear Models (GLMs). - """ - - class TrainingStrategy(Enum): - pass - - model: EstimatorGraphAll - _train_loc: bool - _train_scale: bool - - def __init__( - self, - input_data: InputDataGLM, - batch_size: int, - graph: tf.Graph, - init_a: Union[np.ndarray], - init_b: Union[np.ndarray], - model: EstimatorGraphAll, - provide_optimizers: dict, - provide_batched: bool, - provide_fim: bool, - provide_hessian: bool, - extended_summary, - noise_model: str, - dtype: str - ): - """ - Create a new estimator for a GLM-like model. - - :param input_data: InputData - The input data - :param batch_size: int - Size of mini-batches used. - :param graph: (optional) tf1.Graph - :param init_model: (optional) - If provided, this model will be used to initialize this Estimator. - :param init_a: np.ndarray - Initialization of 'a' (location) model. - :param init_b: np.ndarray - Initialization of 'b' (scale) model. - :param quick_scale: bool - Whether `scale` will be fitted faster and maybe less accurate. - :param model: EstimatorGraph - EstimatorGraph to use. Basically for debugging. - :param provide_optimizers: - - E.g. {"gd": False, "adam": False, "adagrad": False, "rmsprop": False, - "nr": False, "nr_tr": True, "irls": False, "irls_tr": False} - :param provide_batched: bool - Whether mini-batched optimizers should be provided. - :param extended_summary: Include detailed information in the summaries. - Will increase runtime of summary writer, use only for debugging. - :param dtype: Precision used in tensorflow. - """ - if noise_model == "nb": - from .external_nb import EstimatorGraph - elif noise_model == "norm": - from .external_norm import EstimatorGraph - elif noise_model == "beta": - from .external_beta import EstimatorGraph - else: - raise ValueError("noise model %s was not recognized" % noise_model) - self.noise_model = noise_model - - # validate design matrix: - if np.linalg.matrix_rank(input_data.design_loc) != np.linalg.matrix_rank(input_data.design_loc.T): - raise ValueError("design_loc matrix is not full rank") - if np.linalg.matrix_rank(input_data.design_scale) != np.linalg.matrix_rank(input_data.design_scale.T): - raise ValueError("design_scale matrix is not full rank") - - # ### initialization - if model is None: - if graph is None: - graph = tf.Graph() - - # ### prepare fetch_fn: - def fetch_fn(idx): - r""" - Documentation of tensorflow coding style in this function: - tf1.py_func defines a python function (the getters of the InputData object slots) - as a tensorflow operation. Here, the shape of the tensor is lost and - has to be set with set_shape. For size factors, we use explicit broadcasting - as explained below. - """ - # Catch dimension collapse error if idx is only one element long, ie. 0D: - if len(idx.shape) == 0: - idx = tf.expand_dims(idx, axis=0) - - if isinstance(input_data.x, scipy.sparse.csr_matrix): - X_tensor_idx, X_tensor_val, X_shape = tf.py_function( - func=input_data.fetch_x_sparse, - inp=[idx], - Tout=[np.int64, np.float64, np.int64] - ) - # Note on Tout: np.float64 for val seems to be required to avoid crashing v1.12. - X_tensor_idx = tf.cast(X_tensor_idx, dtype=tf.int64) - X_shape = tf.cast(X_shape, dtype=tf.int64) - X_tensor_val = tf.cast(X_tensor_val, dtype=dtype) - X_tensor = (X_tensor_idx, X_tensor_val, X_shape) - else: - X_tensor = tf.py_function( - func=input_data.fetch_x_dense, - inp=[idx], - Tout=input_data.x.dtype - ) - X_tensor.set_shape(idx.get_shape().as_list() + [input_data.num_features]) - X_tensor = (tf.cast(X_tensor, dtype=dtype),) - - design_loc_tensor = tf.py_function( - func=input_data.fetch_design_loc, - inp=[idx], - Tout=input_data.design_loc.dtype - ) - design_loc_tensor.set_shape(idx.get_shape().as_list() + [input_data.num_design_loc_params]) - design_loc_tensor = tf.cast(design_loc_tensor, dtype=dtype) - - design_scale_tensor = tf.py_function( - func=input_data.fetch_design_scale, - inp=[idx], - Tout=input_data.design_scale.dtype - ) - design_scale_tensor.set_shape(idx.get_shape().as_list() + [input_data.num_design_scale_params]) - design_scale_tensor = tf.cast(design_scale_tensor, dtype=dtype) - - if input_data.size_factors is not None and noise_model in ["nb", "norm"]: - size_factors_tensor = tf.py_function( - func=input_data.fetch_size_factors, - inp=[idx], - Tout=input_data.size_factors.dtype - ) - size_factors_tensor.set_shape(idx.get_shape()) - size_factors_tensor = tf.expand_dims(size_factors_tensor, axis=-1) - size_factors_tensor = tf.cast(size_factors_tensor, dtype=dtype) - else: - size_factors_tensor = tf.constant(1, shape=[1, 1], dtype=dtype) - - size_factors_tensor = tf.broadcast_to(size_factors_tensor, - shape=[tf.size(idx), input_data.num_features]) - - # return idx, data - return idx, (X_tensor, design_loc_tensor, design_scale_tensor, size_factors_tensor) - - _TFEstimator.__init__( - self=self - ) - with graph.as_default(): - # create model - model = EstimatorGraph( - fetch_fn=fetch_fn, - feature_isnonzero=input_data.feature_isnonzero, - num_observations=input_data.num_observations, - num_features=input_data.num_features, - num_design_loc_params=input_data.num_design_loc_params, - num_design_scale_params=input_data.num_design_scale_params, - num_loc_params=input_data.num_loc_params, - num_scale_params=input_data.num_scale_params, - batch_size=np.min([batch_size, input_data.x.shape[0]]), - graph=graph, - init_a=init_a, - init_b=init_b, - constraints_loc=input_data.constraints_loc, - constraints_scale=input_data.constraints_scale, - provide_optimizers=provide_optimizers, - provide_batched=provide_batched, - provide_fim=provide_fim, - provide_hessian=provide_hessian, - train_loc=self._train_loc, - train_scale=self._train_scale, - extended_summary=extended_summary, - noise_model=self.noise_model, - dtype=dtype - ) - model.session = self.session - _EstimatorGLM.__init__( - self=self, - model=model, - input_data=input_data - ) - - def _scaffold(self): - with self.model.graph.as_default(): - scaffold = tf.compat.v1.train.Scaffold( - init_op=self.model.init_op, - summary_op=self.model.merged_summary, - saver=self.model.saver, - ) - return scaffold - - def train( - self, - *args, - learning_rate=None, - convergence_criteria="all_converged", - stopping_criteria=None, - train_loc: bool = None, - train_scale: bool = None, - use_batching=False, - optim_algo=None, - **kwargs - ): - r""" - Starts training of the model - - :param feed_dict: dict of values which will be feeded each `session.run()` - - See also feed_dict parameter of `session.run()`. - :param learning_rate: learning rate used for optimization - :param convergence_criteria: criteria after which the training will be interrupted. - Currently implemented criterias: - - - "step": - stop, when the step counter reaches `stopping_criteria` - :param stopping_criteria: Additional parameter for convergence criteria. - - See parameter `convergence_criteria` for exact meaning - :param train_loc: Set to True/False in order to enable/disable training of loc - :param train_scale: Set to True/False in order to enable/disable training of scale - :param use_batching: If True, will use mini-batches with the batch size defined in the constructor. - Otherwise, the gradient of the full dataset will be used. - :param optim_algo: name of the requested train op. - See :func:train_utils.MultiTrainer.train_op_by_name for further details. - """ - if train_loc is None: - # check if mu was initialized with MLE - train_loc = self._train_loc - if train_scale is None: - # check if r was initialized with MLE - train_scale = self._train_scale - - # Check whether newton-rhapson is desired: - require_hessian = False - require_fim = False - trustregion_mode = False - - if optim_algo.lower() == "newton" or \ - optim_algo.lower() == "newton_raphson" or \ - optim_algo.lower() == "nr": - require_hessian = True - - if optim_algo.lower() == "irls" or \ - optim_algo.lower() == "iwls" or \ - optim_algo.lower() == "irls_gd" or \ - optim_algo.lower() == "iwls_gd": - require_fim = True - - if optim_algo.lower() == "newton_tr" or \ - optim_algo.lower() == "nr_tr": - require_hessian = True - trustregion_mode = True - - if optim_algo.lower() == "irls_tr" or \ - optim_algo.lower() == "iwls_tr" or \ - optim_algo.lower() == "irls_gd_tr" or \ - optim_algo.lower() == "iwls_gd_tr": - require_fim = True - trustregion_mode = True - - # Set learning rate defaults if not set by user. - if learning_rate is None: - if require_hessian or require_fim: - learning_rate = 1 - else: - learning_rate = 0.5 - - # Check that newton-rhapson is called properly: - if require_hessian or require_fim: - if learning_rate != 1: - logging.getLogger("batchglm").warning( - "Newton-rhapson or IRLS in base_glm_all is used with learning rate " + - str(learning_rate) + - ". Newton-rhapson and IRLS should only be used with learning rate = 1." - ) - - # Report all parameters after all defaults were imputed in settings: - logging.getLogger("batchglm").debug("Optimizer settings in base_glm_all Estimator.train():") - logging.getLogger("batchglm").debug("learning_rate " + str(learning_rate)) - logging.getLogger("batchglm").debug("convergence_criteria " + str(convergence_criteria)) - logging.getLogger("batchglm").debug("stopping_criteria " + str(stopping_criteria)) - logging.getLogger("batchglm").debug("train_loc " + str(train_loc)) - logging.getLogger("batchglm").debug("train_scale " + str(train_scale)) - logging.getLogger("batchglm").debug("use_batching " + str(use_batching)) - logging.getLogger("batchglm").debug("optim_algo " + str(optim_algo)) - if len(kwargs) > 0: - logging.getLogger("batchglm").debug("**kwargs: ") - logging.getLogger("batchglm").debug(kwargs) - - if train_loc or train_scale: - if use_batching: - train_op = self.model.trainer_batch.train_op_by_name(optim_algo) - else: - train_op = self.model.trainer_full.train_op_by_name(optim_algo) - - super()._train( - *args, - feed_dict={"learning_rate:0": learning_rate}, - convergence_criteria=convergence_criteria, - stopping_criteria=stopping_criteria, - train_op=train_op, - trustregion_mode=trustregion_mode, - require_hessian=require_hessian, - require_fim=require_fim, - is_batched=use_batching, - **kwargs - ) - - def finalize(self): - """ - Evaluate all tensors that need to be exported from session and save these as class attributes - and close session. - - Changes .model entry from tf1-based EstimatorGraph to numpy based Model instance and - transfers relevant attributes. - """ - self.session.run(self.model.full_data_model.final_set) - a_var = self.session.run(self.model.a_var) - b_var = self.session.run(self.model.b_var) - fisher_inv = self.session.run(self.model.fisher_inv) - hessian = self.session.run(self.model.hessian) - jacobian = self.session.run(self.model.gradients) - log_likelihood = self.session.run(self.model.log_likelihood) - loss = self.session.run(self.model.loss) - logging.getLogger("batchglm").debug("Closing session") - self.close_session() - self.model = self.get_model_container(self.input_data) - self.model._a_var = a_var - self.model._b_var = b_var - self._fisher_inv = fisher_inv - self._hessian = hessian - self._jacobian = jacobian - self._log_likelihood = log_likelihood - self._loss = loss - - @abc.abstractmethod - def get_model_container( - self, - input_data - ): - pass - - @abc.abstractmethod - def init_par( - self, - input_data, - init_a, - init_b, - init_model - ): - pass diff --git a/batchglm/train/tf1/base_glm_all/estimator_graph.py b/batchglm/train/tf1/base_glm_all/estimator_graph.py deleted file mode 100644 index 1a05d6a5..00000000 --- a/batchglm/train/tf1/base_glm_all/estimator_graph.py +++ /dev/null @@ -1,543 +0,0 @@ -import logging -import numpy as np -import tensorflow as tf -from typing import Union - -from .external import EstimatorGraphGLM, FullDataModelGraphGLM, BatchedDataModelGraphGLM, ModelVarsGLM -from .external import pkg_constants - -logger = logging.getLogger(__name__) - - -class FullDataModelGraph(FullDataModelGraphGLM): - """ - Computational graph to evaluate GLM metrics on full data set. - - Evaluate model and cost function, Jacobians, Hessians and Fisher information matrix. - """ - - def __init__( - self, - num_observations, - sample_indices: tf.Tensor, - fetch_fn, - batch_size: Union[int, tf.Tensor], - model_vars: ModelVarsGLM, - constraints_loc, - constraints_scale, - noise_model, - train_a, - train_b, - compute_fim, - compute_hessian, - dtype - ): - """ - :param sample_indices: - TODO - :param fetch_fn: - TODO - :param batch_size: int - Size of mini-batches used. - :param model_vars: ModelVars - Variables of model. Contains tf1.Variables which are optimized. - :param constraints_loc: tensor (all parameters x dependent parameters) - Tensor that encodes how complete parameter set which includes dependent - parameters arises from indepedent parameters: all = . - This tensor describes this relation for the mean model. - This form of constraints is used in vector generalized linear models (VGLMs). - :param constraints_scale: tensor (all parameters x dependent parameters) - Tensor that encodes how complete parameter set which includes dependent - parameters arises from indepedent parameters: all = . - This tensor describes this relation for the dispersion model. - This form of constraints is used in vector generalized linear models (VGLMs). - :param train_mu: bool - Whether to train mean model. If False, the initialisation is kept. - :param train_r: bool - Whether to train dispersion model. If False, the initialisation is kept. - :param dtype: Precision used in tensorflow. - """ - if noise_model == "nb": - from .external_nb import ReducibleTensors - elif noise_model == "norm": - from .external_norm import ReducibleTensors - elif noise_model == "beta": - from .external_beta import ReducibleTensors - else: - raise ValueError("noise model not recognized") - self.noise_model = noise_model - - logger.debug("building input pipeline") - with tf.name_scope("input_pipeline"): - data_set = tf.data.Dataset.from_tensor_slices(sample_indices) - data_set = data_set.batch(batch_size) - data_set = data_set.map(fetch_fn, num_parallel_calls=pkg_constants.TF_NUM_THREADS) - - def map_sparse(idx, data): - X_tensor_ls, design_loc_tensor, design_scale_tensor, size_factors_tensor = data - if len(X_tensor_ls) > 1: - X_tensor = tf.SparseTensor(X_tensor_ls[0], X_tensor_ls[1], X_tensor_ls[2]) - X_tensor = tf.cast(X_tensor, dtype=dtype) - else: - X_tensor = X_tensor_ls[0] - return idx, (X_tensor, design_loc_tensor, design_scale_tensor, size_factors_tensor) - - data_set = data_set.map(map_sparse, num_parallel_calls=pkg_constants.TF_NUM_THREADS) - data_set = data_set.prefetch(1) - - with tf.name_scope("reducible_tensors_train"): - reducibles_train = ReducibleTensors( - model_vars=model_vars, - noise_model=noise_model, - constraints_loc=constraints_loc, - constraints_scale=constraints_scale, - sample_indices=sample_indices, - data_set=data_set, - data_batch=None, - mode_jac=pkg_constants.JACOBIAN_MODE, - mode_hessian=pkg_constants.HESSIAN_MODE, - mode_fim=pkg_constants.FIM_MODE, - compute_a=train_a, - compute_b=train_b, - compute_jac=True, - compute_hessian=compute_hessian, - compute_fim=compute_fim, - compute_ll=False - ) - self.neg_jac_train = reducibles_train.neg_jac_train - self.jac = reducibles_train.jac - self.neg_jac_a = reducibles_train.neg_jac_a - self.neg_jac_b = reducibles_train.neg_jac_b - self.jac_b = reducibles_train.jac_b - - self.hessians = reducibles_train.hessian - self.neg_hessians_train = reducibles_train.neg_hessian_train - - self.fim_a = reducibles_train.fim_a - self.fim_b = reducibles_train.fim_b - - self.train_set = reducibles_train.set - - with tf.name_scope("reducible_tensors_finalize"): - reducibles_finalize = ReducibleTensors( - model_vars=model_vars, - noise_model=noise_model, - constraints_loc=constraints_loc, - constraints_scale=constraints_scale, - sample_indices=sample_indices, - data_set=data_set, - data_batch=None, - mode_jac=pkg_constants.JACOBIAN_MODE, - mode_hessian=pkg_constants.HESSIAN_MODE, - mode_fim=pkg_constants.FIM_MODE, - compute_a=True, - compute_b=True, - compute_jac=True, - compute_hessian=True, - compute_fim=False, - compute_ll=True - ) - self.hessians_final = reducibles_finalize.hessian - self.neg_jac_final = reducibles_finalize.neg_jac - self.log_likelihood_final = reducibles_finalize.ll - self.loss_final = tf.reduce_sum(-self.log_likelihood_final / num_observations) - - self.final_set = reducibles_finalize.set - - with tf.name_scope("reducible_tensors_eval_ll"): - reducibles_eval0 = ReducibleTensors( - model_vars=model_vars, - noise_model=noise_model, - constraints_loc=constraints_loc, - constraints_scale=constraints_scale, - sample_indices=sample_indices, - data_set=data_set, - data_batch=None, - mode_jac=pkg_constants.JACOBIAN_MODE, - mode_hessian=pkg_constants.HESSIAN_MODE, - mode_fim=pkg_constants.FIM_MODE, - compute_a=train_a, - compute_b=train_b, - compute_jac=False, - compute_hessian=False, - compute_fim=False, - compute_ll=True - ) - self.log_likelihood_eval0 = reducibles_eval0.ll - self.norm_neg_log_likelihood_eval0 = -self.log_likelihood_eval0 / num_observations - self.loss_eval0 = tf.reduce_sum(self.norm_neg_log_likelihood_eval0) - - self.eval0_set = reducibles_eval0.set - - with tf.name_scope("reducible_tensors_eval_ll_jac"): - reducibles_eval1 = ReducibleTensors( - model_vars=model_vars, - noise_model=noise_model, - constraints_loc=constraints_loc, - constraints_scale=constraints_scale, - sample_indices=sample_indices, - data_set=data_set, - data_batch=None, - mode_jac=pkg_constants.JACOBIAN_MODE, - mode_hessian=pkg_constants.HESSIAN_MODE, - mode_fim=pkg_constants.FIM_MODE, - compute_a=train_a, - compute_b=train_b, - compute_jac=True, - compute_hessian=False, - compute_fim=False, - compute_ll=True - ) - self.log_likelihood_eval1 = reducibles_eval1.ll - self.norm_neg_log_likelihood_eval1 = -self.log_likelihood_eval1 / num_observations - self.loss_eval1 = tf.reduce_sum(self.norm_neg_log_likelihood_eval1) - self.neg_jac_train_eval = reducibles_eval1.neg_jac_train - - self.eval1_set = reducibles_eval1.set - - self.num_observations = num_observations - self.idx_train_loc = model_vars.idx_train_loc if train_a else np.array([]) - self.idx_train_scale = model_vars.idx_train_scale if train_b else np.array([]) - self.idx_train = np.sort(np.concatenate([self.idx_train_loc, self.idx_train_scale])) - - -class BatchedDataModelGraph(BatchedDataModelGraphGLM): - """ - Basic computational graph to evaluate GLM metrics on batched data set. - - Evaluate model and cost function and Jacobians, Hessians and Fisher information matrix. - """ - - def __init__( - self, - num_observations, - fetch_fn, - batch_size: Union[int, tf.Tensor], - buffer_size: int, - model_vars: ModelVarsGLM, - constraints_loc, - constraints_scale, - noise_model: str, - train_a, - train_b, - compute_fim, - compute_hessian, - dtype - ): - """ - :param fetch_fn: - TODO - :param batch_size: int - Size of mini-batches used. - :param model_vars: ModelVars - Variables of model. Contains tf1.Variables which are optimized. - :param constraints_loc: tensor (all parameters x dependent parameters) - Tensor that encodes how complete parameter set which includes dependent - parameters arises from indepedent parameters: all = . - This tensor describes this relation for the mean model. - This form of constraints is used in vector generalized linear models (VGLMs). - :param constraints_scale: tensor (all parameters x dependent parameters) - Tensor that encodes how complete parameter set which includes dependent - parameters arises from indepedent parameters: all = . - This tensor describes this relation for the dispersion model. - This form of constraints is used in vector generalized linear models (VGLMs). - :param dtype: Precision used in tensorflow. - """ - if noise_model == "nb": - from .external_nb import ReducibleTensors - elif noise_model == "norm": - from .external_norm import ReducibleTensors - elif noise_model == "beta": - from .external_beta import ReducibleTensors - else: - raise ValueError("noise model not recognized") - self.noise_model = noise_model - - with tf.name_scope("input_pipeline"): - data_set = tf.data.Dataset.from_tensor_slices(( - tf.range(num_observations, name="sample_index") - )) - data_set = data_set.shuffle(buffer_size=2 * batch_size) - data_set = data_set.repeat() - data_set = data_set.batch(batch_size, drop_remainder=True) - data_set = data_set.map(tf.contrib.framework.sort) # sort indices - TODO why? - data_set = data_set.map(fetch_fn, num_parallel_calls=pkg_constants.TF_NUM_THREADS) - data_set = data_set.prefetch(buffer_size) - - def map_sparse(idx, data_batch): - X_tensor_ls, design_loc_tensor, design_scale_tensor, size_factors_tensor = data_batch - if len(X_tensor_ls) > 1: - X_tensor = tf.SparseTensor(X_tensor_ls[0], X_tensor_ls[1], X_tensor_ls[2]) - X_tensor = tf.cast(X_tensor, dtype=dtype) - else: - X_tensor = X_tensor_ls[0] - return idx, (X_tensor, design_loc_tensor, design_scale_tensor, size_factors_tensor) - - data_set = data_set.map(map_sparse, num_parallel_calls=pkg_constants.TF_NUM_THREADS) - iterator = tf.compat.v1.data.make_one_shot_iterator(data_set) - - batch_sample_index, batch_data = iterator.get_next() - - with tf.name_scope("reducible_tensors_train"): - reducibles_train = ReducibleTensors( - model_vars=model_vars, - noise_model=noise_model, - constraints_loc=constraints_loc, - constraints_scale=constraints_scale, - sample_indices=batch_sample_index, - data_set=None, - data_batch=batch_data, - mode_jac=pkg_constants.JACOBIAN_MODE, - mode_hessian=pkg_constants.HESSIAN_MODE, - mode_fim=pkg_constants.FIM_MODE, - compute_a=train_a, - compute_b=train_b, - compute_jac=True, - compute_hessian=compute_hessian, - compute_fim=compute_fim, - compute_ll=False - ) - - self.neg_jac_train = reducibles_train.neg_jac_train - self.jac = reducibles_train.jac - self.neg_jac_a = reducibles_train.neg_jac_a - self.neg_jac_b = reducibles_train.neg_jac_b - self.jac_b = reducibles_train.jac_b - - self.hessians = reducibles_train.hessian - self.neg_hessians_train = reducibles_train.neg_hessian_train - - self.fim_a = reducibles_train.fim_a - self.fim_b = reducibles_train.fim_b - - self.train_set = reducibles_train.set - - with tf.name_scope("reducible_tensors_eval"): - reducibles_eval = ReducibleTensors( - model_vars=model_vars, - noise_model=noise_model, - constraints_loc=constraints_loc, - constraints_scale=constraints_scale, - sample_indices=batch_sample_index, - data_set=None, - data_batch=batch_data, - mode_jac=pkg_constants.JACOBIAN_MODE, - mode_hessian=pkg_constants.HESSIAN_MODE, - mode_fim=pkg_constants.FIM_MODE, - compute_a=True, - compute_b=True, - compute_jac=True, - compute_hessian=False, - compute_fim=False, - compute_ll=True - ) - - self.log_likelihood = reducibles_eval.ll - self.norm_log_likelihood = self.log_likelihood / num_observations - self.norm_neg_log_likelihood = -self.norm_log_likelihood - self.loss = tf.reduce_sum(self.norm_neg_log_likelihood) - - self.neg_jac_train_eval = reducibles_train.neg_jac_train - - self.eval_set = reducibles_eval.set - - self.num_observations = num_observations - self.idx_train_loc = model_vars.idx_train_loc if train_a else np.array([]) - self.idx_train_scale = model_vars.idx_train_scale if train_b else np.array([]) - self.idx_train = np.sort(np.concatenate([self.idx_train_loc, self.idx_train_scale])) - - -class EstimatorGraphAll(EstimatorGraphGLM): - """ - - Contains model_vars, full_data_model and batched_data_model which are the - primary training objects. All three also exist as *_eval which can be used - to perform and iterative optmization within a single parameter update, such - as during a line search. - """ - - mu: tf.Tensor - sigma2: tf.Tensor - - def __init__( - self, - fetch_fn, - feature_isnonzero, - num_observations, - num_features, - num_design_loc_params, - num_design_scale_params, - num_loc_params, - num_scale_params, - constraints_loc: np.ndarray, - constraints_scale: np.ndarray, - graph: tf.Graph, - batch_size: int, - init_a, - init_b, - train_loc: bool, - train_scale: bool, - provide_optimizers: Union[dict, None], - provide_batched: bool, - provide_hessian: bool, - provide_fim: bool, - extended_summary: bool, - noise_model: str, - dtype: str - ): - """ - - :param fetch_fn: - TODO - :param feature_isnonzero: - Whether all observations of a feature are zero. Features for which this - is the case are not fitted. - :param num_observations: int - Number of observations. - :param num_features: int - Number of features. - :param num_design_loc_params: int - Number of parameters per feature in mean model. - :param num_design_scale_params: int - Number of parameters per feature in scale model. - :param graph: tf1.Graph - :param batch_size: int - Size of mini-batches used. - :param init_a: nd.array (mean model size x features) - Initialisation for all parameters of mean model. - :param init_b: nd.array (dispersion model size x features) - Initialisation for all parameters of dispersion model. - :param constraints_loc: tensor (all parameters x dependent parameters) - Tensor that encodes how complete parameter set which includes dependent - parameters arises from indepedent parameters: all = . - This tensor describes this relation for the mean model. - This form of constraints is used in vector generalized linear models (VGLMs). - :param constraints_scale: tensor (all parameters x dependent parameters) - Tensor that encodes how complete parameter set which includes dependent - parameters arises from indepedent parameters: all = . - This tensor describes this relation for the dispersion model. - This form of constraints is used in vector generalized linear models (VGLMs). - :param train_loc: bool - Whether to train mean model. If False, the initialisation is kept. - :param train_scale: bool - Whether to train dispersion model. If False, the initialisation is kept. - :param provide_optimizers: - :param extended_summary: - :param dtype: Precision used in tensorflow. - """ - if noise_model == "nb": - from .external_nb import ModelVars - elif noise_model == "norm": - from .external_norm import ModelVars - elif noise_model == "beta": - from .external_beta import ModelVars - else: - raise ValueError("noise model not recognized") - self.noise_model = noise_model - - EstimatorGraphGLM.__init__( - self=self, - num_observations=num_observations, - num_features=num_features, - num_design_loc_params=num_design_loc_params, - num_design_scale_params=num_design_scale_params, - num_loc_params=num_loc_params, - num_scale_params=num_scale_params, - graph=graph, - batch_size=batch_size, - constraints_loc=constraints_loc, - constraints_scale=constraints_scale, - dtype=dtype - ) - - # initial graph elements - with self.graph.as_default(): - - logger.debug("building models variables") - with tf.name_scope("model_vars"): - self.model_vars = ModelVars( - dtype=dtype, - init_a=init_a, - init_b=init_b, - constraints_loc=self.constraints_loc, - constraints_scale=self.constraints_scale - ) - - # ### performance related settings - buffer_size = 4 - - with tf.name_scope("batched_data"): - logger.debug("building batched data model") - if provide_batched: - self.batched_data_model = BatchedDataModelGraph( - num_observations=self.num_observations, - fetch_fn=fetch_fn, - batch_size=batch_size, - buffer_size=buffer_size, - model_vars=self.model_vars, - constraints_loc=self.constraints_loc, - constraints_scale=self.constraints_scale, - noise_model=noise_model, - train_a=train_loc, - train_b=train_scale, - compute_fim=provide_fim, - compute_hessian=provide_hessian, - dtype=dtype - ) - else: - self.batched_data_model = None - - with tf.name_scope("full_data"): - logger.debug("building full data model") - # ### alternative definitions for custom observations: - sample_selection = tf.compat.v1.placeholder_with_default( - tf.range(num_observations), - shape=(None,), - name="sample_selection" - ) - self.full_data_model = FullDataModelGraph( - num_observations=self.num_observations, - sample_indices=sample_selection, - fetch_fn=fetch_fn, - batch_size=batch_size, - model_vars=self.model_vars, - constraints_loc=self.constraints_loc, - constraints_scale=self.constraints_scale, - noise_model=noise_model, - train_a=train_loc, - train_b=train_scale, - compute_fim=provide_fim, - compute_hessian=provide_hessian, - dtype=dtype - ) - - logger.debug("building trainers") - self._run_trainer_init( - provide_optimizers=provide_optimizers, - train_loc=train_loc, - train_scale=train_scale, - dtype=dtype - ) - - # Define output metrics: - logger.debug("building outputs") - self._set_out_var( - feature_isnonzero=feature_isnonzero, - dtype=dtype - ) - self.loss = self.full_data_model.loss_final - self.log_likelihood = self.full_data_model.log_likelihood_final - self.hessian = self.full_data_model.hessians_final - self.fisher_inv = tf.linalg.inv(-self.full_data_model.hessians_final) # TODO switch for fim? - # Summary statistics on feature-wise model gradients: - self.gradients = tf.reduce_sum(tf.abs(self.full_data_model.neg_jac_final / num_observations), axis=1) - - with tf.name_scope('summaries'): - if extended_summary: - tf.summary.histogram('a_var', self.model_vars.a_var) - tf.summary.histogram('b_var', self.model_vars.b_var) - tf.summary.scalar('loss', self.full_data_model.loss) - tf.summary.scalar('learning_rate', self.learning_rate) - - self.saver = tf.compat.v1.train.Saver() - self.merged_summary = tf.compat.v1.summary.merge_all() diff --git a/batchglm/train/tf1/base_glm_all/external.py b/batchglm/train/tf1/base_glm_all/external.py deleted file mode 100644 index 0682a071..00000000 --- a/batchglm/train/tf1/base_glm_all/external.py +++ /dev/null @@ -1,12 +0,0 @@ -import batchglm.data as data_utils - -import batchglm.train.tf1.train as train_utils -from batchglm.train.tf1.base import TFEstimatorGraph, _TFEstimator -from batchglm.train.tf1.base_glm import GradientGraphGLM, NewtonGraphGLM, TrainerGraphGLM, EstimatorGraphGLM, FullDataModelGraphGLM, BatchedDataModelGraphGLM, BasicModelGraphGLM -from batchglm.train.tf1.base_glm import ProcessModelGLM, ModelVarsGLM, FIMGLM, HessiansGLM, JacobiansGLM, ReducableTensorsGLM - -from batchglm.models.base_glm import InputDataGLM, _ModelGLM, _EstimatorGLM - -import batchglm.train.tf1.ops as op_utils -from batchglm.utils.linalg import groupwise_solve_lm -from batchglm import pkg_constants diff --git a/batchglm/train/tf1/base_glm_all/external_beta.py b/batchglm/train/tf1/base_glm_all/external_beta.py deleted file mode 100644 index 93cdd974..00000000 --- a/batchglm/train/tf1/base_glm_all/external_beta.py +++ /dev/null @@ -1,6 +0,0 @@ -from batchglm.train.tf1.glm_beta import EstimatorGraph -from batchglm.train.tf1.glm_beta import BasicModelGraph, ModelVars, ProcessModel -from batchglm.train.tf1.glm_beta import Hessians, FIM, Jacobians, ReducibleTensors - -from batchglm.models.glm_beta import InputDataGLM, Model -from batchglm.models.glm_beta.utils import closedform_beta_glm_logitmean, closedform_beta_glm_logsamplesize \ No newline at end of file diff --git a/batchglm/train/tf1/base_glm_all/external_nb.py b/batchglm/train/tf1/base_glm_all/external_nb.py deleted file mode 100644 index 97ec6bbe..00000000 --- a/batchglm/train/tf1/base_glm_all/external_nb.py +++ /dev/null @@ -1,6 +0,0 @@ -from batchglm.train.tf1.glm_nb import EstimatorGraph -from batchglm.train.tf1.glm_nb import BasicModelGraph, ModelVars, ProcessModel -from batchglm.train.tf1.glm_nb import Hessians, FIM, Jacobians, ReducibleTensors - -from batchglm.models.glm_nb import InputDataGLM, Model -from batchglm.models.glm_nb.utils import closedform_nb_glm_logmu, closedform_nb_glm_logphi \ No newline at end of file diff --git a/batchglm/train/tf1/base_glm_all/external_norm.py b/batchglm/train/tf1/base_glm_all/external_norm.py deleted file mode 100644 index 3b577f14..00000000 --- a/batchglm/train/tf1/base_glm_all/external_norm.py +++ /dev/null @@ -1,6 +0,0 @@ -from batchglm.train.tf1.glm_norm import EstimatorGraph -from batchglm.train.tf1.glm_norm import BasicModelGraph, ModelVars, ProcessModel -from batchglm.train.tf1.glm_norm import Hessians, FIM, Jacobians, ReducibleTensors - -from batchglm.models.glm_norm import InputDataGLM, Model -from batchglm.models.glm_norm.utils import closedform_norm_glm_mean, closedform_norm_glm_logsd \ No newline at end of file diff --git a/batchglm/train/tf1/base_glm_all/fim.py b/batchglm/train/tf1/base_glm_all/fim.py deleted file mode 100644 index 2126c0f3..00000000 --- a/batchglm/train/tf1/base_glm_all/fim.py +++ /dev/null @@ -1,115 +0,0 @@ -import tensorflow as tf - -import logging - -from .external import FIMGLM - -logger = logging.getLogger(__name__) - - -class FIMGLMALL(FIMGLM): - """ - Compute the iteratively re-weighted least squares (IWLS or IRLS) - parameter updates for a negative binomial GLM. - """ - - def fim_a_analytic( - self, - model - ): - """ - Compute the closed-form of the base_glm_all model hessian - by evaluating its terms grouped by observations. - - Has three sub-functions which built the specific blocks of the hessian - and one sub-function which concatenates the blocks into a full hessian. - """ - - def _a_byobs(model): - """ - Compute the mean model diagonal block of the - closed form hessian of base_glm_all model by observation across features - for a batch of observations. - - :param X: tf1.tensor observations x features - Observation by observation and feature. - :param model_loc: tf1.tensor observations x features - Value of mean model by observation and feature. - :param model_scale: tf1.tensor observations x features - Value of dispersion model by observation and feature. - """ - W = self._weight_fim_aa( # [observations x features] - loc=model.model_loc, - scale=model.model_scale - ) - # The computation of the hessian block requires two outer products between - # feature-wise constants and the coefficient wise design matrix entries, for each observation. - # The resulting tensor is observations x features x coefficients x coefficients which - # is too large too store in memory in most cases. However, the full 4D tensor is never - # actually needed but only its marginal across features, the final hessian block shape. - # Here, we use the einsum to efficiently perform the two outer products and the marginalisation. - if self.constraints_loc is not None: - XH = tf.matmul(model.design_loc, self.constraints_loc) - else: - XH = model.design_loc - - fim = tf.einsum('ofc,od->fcd', - tf.einsum('of,oc->ofc', W, XH), - XH) - return fim - - if self.compute_fim_a: - fim_a = _a_byobs(model=model) - else: - fim_a = tf.zeros((), dtype=self.dtype) - - return fim_a - - def fim_b_analytic( - self, - model - ): - """ - Compute the closed-form of the base_glm_all model hessian - by evaluating its terms grouped by observations. - - Has three sub-functions which built the specific blocks of the hessian - and one sub-function which concatenates the blocks into a full hessian. - """ - - def _b_byobs(model): - """ - Compute the dispersion model diagonal block of the - closed form hessian of base_glm_all model by observation across features. - """ - W = self._weight_fim_bb( # [observations=1 x features] - loc=model.model_loc, - scale=model.model_scale - ) - # The computation of the hessian block requires two outer products between - # feature-wise constants and the coefficient wise design matrix entries, for each observation. - # The resulting tensor is observations x features x coefficients x coefficients which - # is too large too store in memory in most cases. However, the full 4D tensor is never - # actually needed but only its marginal across features, the final hessian block shape. - # Here, we use the Einstein summation to efficiently perform the two outer products and the marginalisation. - if self.constraints_scale is not None: - XH = tf.matmul(model.design_scale, self.constraints_scale) - else: - XH = model.design_scale - - fim = tf.einsum('ofc,od->fcd', - tf.einsum('of,oc->ofc', W, XH), - XH) - return fim - - # The full fisher information matrix is block-diagonal with the cross-model - # blocks all zero. Accordingly, mean and dispersion model updates can be - # treated independently and the full fisher information matrix is never required. - # Here, the non-zero model-wise diagonal blocks are computed and returned - # as a dictionary. The according score function vectors are also returned as a dictionary. - if self.compute_fim_b: - fim_b = _b_byobs(model=model) - else: - fim_b = tf.zeros((), dtype=self.dtype) - - return fim_b diff --git a/batchglm/train/tf1/base_glm_all/hessians.py b/batchglm/train/tf1/base_glm_all/hessians.py deleted file mode 100644 index f6fcba18..00000000 --- a/batchglm/train/tf1/base_glm_all/hessians.py +++ /dev/null @@ -1,193 +0,0 @@ -import logging - -import tensorflow as tf - -from .external import pkg_constants -from .external import HessiansGLM - -logger = logging.getLogger(__name__) - - -class HessianGLMALL(HessiansGLM): - """ - Compute the Hessian matrix for a GLM by gene using gradients from tensorflow. - """ - - def hessian_analytic( - self, - model - ) -> tf.Tensor: - """ - Compute the closed-form of the base_glm_all model hessian - by evaluating its terms grouped by observations. - - Has three sub-functions which built the specific blocks of the hessian - and one sub-function which concatenates the blocks into a full hessian. - """ - - def _aa_byobs_batched(model): - """ - Compute the mean model diagonal block of the - closed form hessian of base_glm_all model by observation across features - for a batch of observations. - """ - W = self._weight_hessian_aa( # [observations x features] - X=model.X, - loc=model.model_loc, - scale=model.model_scale, - ) - # The computation of the hessian block requires two outer products between - # feature-wise constants and the coefficient wise design matrix entries, for each observation. - # The resulting tensor is observations x features x coefficients x coefficients which - # is too large too store in memory in most cases. However, the full 4D tensor is never - # actually needed but only its marginal across features, the final hessian block shape. - # Here, we use the einsum to efficiently perform the two outer products and the marginalisation. - if self.constraints_loc is not None: - XH = tf.matmul(model.design_loc, model.constraints_loc) - else: - XH = model.design_loc - - Hblock = tf.einsum('ofc,od->fcd', - tf.einsum('of,oc->ofc', W, XH), - XH) - return Hblock - - def _bb_byobs_batched(model): - """ - Compute the dispersion model diagonal block of the - closed form hessian of base_glm_all model by observation across features. - """ - W = self._weight_hessian_bb( # [observations=1 x features] - X=model.X, - loc=model.model_loc, - scale=model.model_scale, - ) - # The computation of the hessian block requires two outer products between - # feature-wise constants and the coefficient wise design matrix entries, for each observation. - # The resulting tensor is observations x features x coefficients x coefficients which - # is too large too store in memory in most cases. However, the full 4D tensor is never - # actually needed but only its marginal across features, the final hessian block shape. - # Here, we use the Einstein summation to efficiently perform the two outer products and the marginalisation. - if self.constraints_scale is not None: - XH = tf.matmul(model.design_scale, model.constraints_scale) - else: - XH = model.design_scale - - Hblock = tf.einsum('ofc,od->fcd', - tf.einsum('of,oc->ofc', W, XH), - XH) - return Hblock - - def _ab_byobs_batched(model): - """ - Compute the mean-dispersion model off-diagonal block of the - closed form hessian of base_glm_all model by observastion across features. - - Note that there are two blocks of the same size which can - be compute from each other with a transpose operation as - the hessian is symmetric. - """ - W = self._weight_hessian_ab( # [observations=1 x features] - X=model.X, - loc=model.model_loc, - scale=model.model_scale, - ) - # The computation of the hessian block requires two outer products between - # feature-wise constants and the coefficient wise design matrix entries, for each observation. - # The resulting tensor is observations x features x coefficients x coefficients which - # is too large too store in memory in most cases. However, the full 4D tensor is never - # actually needed but only its marginal across features, the final hessian block shape. - # Here, we use the Einstein summation to efficiently perform the two outer products and the marginalisation. - if self.constraints_loc is not None: - XHloc = tf.matmul(model.design_loc, model.constraints_loc) - else: - XHloc = model.design_loc - - if self.constraints_scale is not None: - XHscale = tf.matmul(model.design_scale, model.constraints_scale) - else: - XHscale = model.design_scale - - Hblock = tf.einsum('ofc,od->fcd', - tf.einsum('of,oc->ofc', W, XHloc), - XHscale) - return Hblock - - if self.compute_a and self.compute_b: - H_aa = _aa_byobs_batched(model=model) - H_bb = _bb_byobs_batched(model=model) - H_ab = _ab_byobs_batched(model=model) - H_ba = tf.transpose(H_ab, perm=[0, 2, 1]) - H = tf.concat( - [tf.concat([H_aa, H_ab], axis=2), - tf.concat([H_ba, H_bb], axis=2)], - axis=1 - ) - elif self.compute_a and not self.compute_b: - H = _aa_byobs_batched(model=model) - elif not self.compute_a and self.compute_b: - H = _bb_byobs_batched(model=model) - else: - H = tf.zeros((), dtype=self.dtype) - - return H - - def hessian_tf( - self, - model - ) -> tf.Tensor: - """ - Compute hessians via tf1.gradients for all gene-wise in parallel. - """ - if self.compute_a and self.compute_b: - var_shape = tf.shape(self.model_vars.params) - var = self.model_vars.params - elif self.compute_a and not self.compute_b: - var_shape = tf.shape(self.model_vars.a_var) - var = self.model_vars.a_var - elif not self.compute_a and self.compute_b: - var_shape = tf.shape(self.model_vars.b_var) - var = self.model_vars.b_var - - if self.compute_a or self.compute_b: - # Compute first order derivatives as first step to get second order derivatives. - first_der = tf.gradients(model.log_likelihood, var)[0] - - # Note on error comment below: The arguments that cause the error, infer_shape and element_shape, - # are not necessary for this code but would provide an extra layer of stability as all - # elements of the array have the same shape. - loop_vars = [ - tf.constant(0, tf.int32), # iteration counter - tf.TensorArray( # hessian slices [:,:,j] - dtype=var.dtype, - size=var_shape[0], - clear_after_read=False - #infer_shape=True, # TODO tf1>=2.0: this causes error related to eager execution in tf1.12 - #element_shape=var_shape - ) - ] - - # Compute second order derivatives based on parameter-wise slices of the tensor of first order derivatives. - _, h_tensor_array = tf.while_loop( - cond=lambda i, _: i < var_shape[0], - body=lambda i, result: ( - i + 1, - result.write( - index=i, - value=tf.gradients(first_der[i, :], var)[0] - ) - ), - loop_vars=loop_vars, - return_same_structure=True - ) - - # h_tensor_array is a TensorArray, reshape this into a tensor so that it can be used - # in down-stream computation graphs. - h = tf.transpose(tf.reshape( - h_tensor_array.stack(), - tf.stack((var_shape[0], var_shape[0], var_shape[1])) - ), perm=[2, 1, 0]) - else: - h = tf.zeros((), dtype=self.dtype) - - return h \ No newline at end of file diff --git a/batchglm/train/tf1/base_glm_all/jacobians.py b/batchglm/train/tf1/base_glm_all/jacobians.py deleted file mode 100644 index b7be31e7..00000000 --- a/batchglm/train/tf1/base_glm_all/jacobians.py +++ /dev/null @@ -1,103 +0,0 @@ -import logging - -import tensorflow as tf - -from .external import JacobiansGLM - -logger = logging.getLogger(__name__) - - -class JacobiansGLMALL(JacobiansGLM): - """ - Compute the Jacobian matrix for a GLM using gradients from tensorflow. - """ - - def jac_analytic( - self, - model - ) -> tf.Tensor: - """ - Compute the closed-form of the base_glm_all model jacobian - by evalutating its terms grouped by observations. - """ - - def _a_byobs(X, design_loc, loc, scale): - """ - Compute the mean model block of the jacobian. - - :param X: tf1.tensor observations x features - Observation by observation and feature. - :param model_loc: tf1.tensor observations x features - Value of mean model by observation and feature. - :param model_scale: tf1.tensor observations x features - Value of dispersion model by observation and feature. - :return Jblock: tf1.tensor features x coefficients - Block of jacobian. - """ - W = self._weights_jac_a(X=X, loc=loc, scale=scale) # [observations, features] - if self.constraints_loc is not None: - XH = tf.matmul(design_loc, self.constraints_loc) - else: - XH = design_loc - - Jblock = tf.matmul(tf.transpose(W), XH) # [features, coefficients] - return Jblock - - def _b_byobs(X, design_scale, loc, scale): - """ - Compute the dispersion model block of the jacobian. - """ - W = self._weights_jac_b(X=X, loc=loc, scale=scale) # [observations, features] - if self.constraints_scale is not None: - XH = tf.matmul(design_scale, self.constraints_scale) - else: - XH = design_scale - - Jblock = tf.matmul(tf.transpose(W), XH) # [features, coefficients] - return Jblock - - if self.compute_a and self.compute_b: - J_a = _a_byobs(X=model.X, design_loc=model.design_loc, loc=model.model_loc, scale=model.model_scale) - J_b = _b_byobs(X=model.X, design_scale=model.design_scale, loc=model.model_loc, scale=model.model_scale) - J = tf.concat([J_a, J_b], axis=1) - elif self.compute_a and not self.compute_b: - J = _a_byobs(X=model.X, design_loc=model.design_loc, loc=model.model_loc, scale=model.model_scale) - elif not self.compute_a and self.compute_b: - J = _b_byobs(X=model.X, design_scale=model.design_scale, loc=model.model_loc, scale=model.model_scale) - else: - J = tf.zeros((), dtype=self.dtype) - - return J - - def jac_tf( - self, - model - ) -> tf.Tensor: - """ - Compute the Jacobian matrix for a GLM using gradients from tensorflow. - """ - def _jac(): - J = tf.gradients(model.log_likelihood, self.model_vars.params)[0] - J = tf.transpose(J) - return J - - def _jac_a(): - J_a = tf.gradients(model.log_likelihood, self.model_vars.a_var)[0] - J_a = tf.transpose(J_a) - return J_a - - def _jac_b(): - J_b = tf.gradients(model.log_likelihood, self.model_vars.b_var)[0] - J_b = tf.transpose(J_b) - return J_b - - if self.compute_a and self.compute_b: - J = _jac() - elif self.compute_a and not self.compute_b: - J = _jac_a() - elif not self.compute_a and self.compute_b: - J = _jac_b() - else: - J = tf.zeros((), dtype=self.dtype) - - return J diff --git a/batchglm/train/tf1/base_glm_all/reducible_tensors.py b/batchglm/train/tf1/base_glm_all/reducible_tensors.py deleted file mode 100644 index 2263c0a2..00000000 --- a/batchglm/train/tf1/base_glm_all/reducible_tensors.py +++ /dev/null @@ -1,99 +0,0 @@ -import logging - -import tensorflow as tf - -from .external import ReducableTensorsGLM - -logger = logging.getLogger("batchglm") - - -class ReducableTensorsGLMALL(ReducableTensorsGLM): - """ - """ - - def assemble_tensors(self, idx, data): - """ - Assemble jacobian of a batch of observations across all features. - - This function runs the data batch (an observation) through the - model graph and calls the wrappers that compute the - individual closed forms of the jacobian. - - :param idx: Indices of observations. - :param data: tuple - Containing the following parameters: - - X: tf1.tensor observations x features - Observation by observation and feature. - - size_factors: tf1.tensor observations x features - Model size factors by observation and feature. - - params: tf1.tensor features x coefficients - Estimated model variables. - :return J: tf1.tensor features x coefficients - Jacobian evaluated on a single observation, provided in data. - """ - if self.noise_model == "nb": - from .external_nb import BasicModelGraph - elif self.noise_model == "norm": - from .external_norm import BasicModelGraph - elif self.noise_model == "beta": - from .external_beta import BasicModelGraph - else: - raise ValueError("noise model %s was not recognized" % self.noise_model) - - X, design_loc, design_scale, size_factors = data - - model = BasicModelGraph( - X=X, - design_loc=design_loc, - design_scale=design_scale, - constraints_loc=self.constraints_loc, - constraints_scale=self.constraints_scale, - a_var=self.model_vars.a_var, - b_var=self.model_vars.b_var, - dtype=self.model_vars.dtype, - size_factors=size_factors - ) - dtype = model.dtype - - if self.compute_jac: - if self.mode_jac == "analytic": - jac = self.jac_analytic(model=model) - elif self.mode_jac == "tf1": - jac = self.jac_tf(model=model) - else: - raise ValueError("mode_jac %s not recognized" % self.mode_jac) - else: - jac = tf.zeros((), dtype=dtype) - - if self.compute_hessian: - if self.mode_hessian == "analytic": - hessian = self.hessian_analytic(model=model) - elif self.mode_hessian == "tf1": - hessian = self.hessian_tf(model=model) - else: - raise ValueError("mode_hessian %s not recognized" % self.mode_hessian) - else: - hessian = tf.zeros((), dtype=dtype) - - if self.compute_fim_a: - if self.mode_fim == "analytic": - fim_a = self.fim_a_analytic(model=model) - else: - raise ValueError("mode_fim %s not recognized" % self.mode_fim) - else: - fim_a = tf.zeros((), dtype=dtype) - - if self.compute_fim_b: - if self.mode_fim == "analytic": - fim_b = self.fim_b_analytic(model=model) - else: - raise ValueError("mode_fim %s not recognized" % self.mode_fim) - else: - fim_b = tf.zeros((), dtype=dtype) - - if self.compute_ll: - ll = model.log_likelihood - else: - ll = tf.zeros((), dtype=dtype) - - return [jac, hessian, fim_a, fim_b, ll] diff --git a/batchglm/train/tf1/external.py b/batchglm/train/tf1/external.py deleted file mode 100644 index 0b70405a..00000000 --- a/batchglm/train/tf1/external.py +++ /dev/null @@ -1 +0,0 @@ -from batchglm import pkg_constants \ No newline at end of file diff --git a/batchglm/train/tf1/glm_beta/__init__.py b/batchglm/train/tf1/glm_beta/__init__.py deleted file mode 100644 index 4db081bb..00000000 --- a/batchglm/train/tf1/glm_beta/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -from .estimator import Estimator -from .estimator_graph import EstimatorGraph -from .model import BasicModelGraph, ModelVars, ProcessModel -from .hessians import Hessians -from .fim import FIM -from .jacobians import Jacobians -from .reducible_tensors import ReducibleTensors diff --git a/batchglm/train/tf1/glm_beta/estimator.py b/batchglm/train/tf1/glm_beta/estimator.py deleted file mode 100644 index 6cd96878..00000000 --- a/batchglm/train/tf1/glm_beta/estimator.py +++ /dev/null @@ -1,291 +0,0 @@ -import logging -from typing import Union - -import numpy as np -import tensorflow as tf - -from .external import TFEstimatorGLM, InputDataGLM, Model -from .external import closedform_beta_glm_logitmean, closedform_beta_glm_logsamplesize -from .estimator_graph import EstimatorGraph -from .model import ProcessModel -from .training_strategies import TrainingStrategies - - -class Estimator(TFEstimatorGLM, ProcessModel): - """ - Estimator for Generalized Linear Models (GLMs) with beta distributed noise. - Uses a logit linker function for loc and log linker function for scale. - """ - - def __init__( - self, - input_data: InputDataGLM, - batch_size: int = 512, - graph: tf.Graph = None, - init_model: Model = None, - init_a: Union[np.ndarray, str] = "AUTO", - init_b: Union[np.ndarray, str] = "AUTO", - quick_scale: bool = False, - model: EstimatorGraph = None, - provide_optimizers: dict = { - "gd": True, - "adam": True, - "adagrad": True, - "rmsprop": True, - "nr": True, - "nr_tr": True, - "irls": False, - "irls_gd": False, - "irls_tr": False, - "irls_gd_tr": False, - }, - provide_batched: bool = False, - provide_fim: bool = False, - provide_hessian: bool = False, - optim_algos: list = [], - extended_summary=False, - dtype="float64" - ): - """ - Performs initialisation and creates a new estimator. - - :param input_data: InputData - The input data - :param batch_size: int - Size of mini-batches used. - :param graph: (optional) tf1.Graph - :param init_model: (optional) - If provided, this model will be used to initialize this Estimator. - :param init_a: (Optional) - Low-level initial values for a. Can be: - - - str: - * "auto": automatically choose best initialization - * "random": initialize with random values - * "standard": initialize intercept with observed mean - * "init_model": initialize with another model (see `ìnit_model` parameter) - * "closed_form": try to initialize with closed form - - np.ndarray: direct initialization of 'a' - :param init_b: (Optional) - Low-level initial values for b. Can be: - - - str: - * "auto": automatically choose best initialization - * "random": initialize with random values - * "standard": initialize with zeros - * "init_model": initialize with another model (see `ìnit_model` parameter) - * "closed_form": try to initialize with closed form - - np.ndarray: direct initialization of 'b' - :param quick_scale: bool - Whether `scale` will be fitted faster and maybe less accurate. - Useful in scenarios where fitting the exact `scale` is not absolutely necessary. - :param model: EstimatorGraph - EstimatorGraph to use. Basically for debugging. - :param provide_optimizers: - - E.g. {"gd": False, "adam": False, "adagrad": False, "rmsprop": False, - "nr": False, "nr_tr": True, - "irls": False, "irls_gd": False, "irls_tr": False, "irls_gd_tr": False} - :param provide_batched: bool - Whether mini-batched optimizers should be provided. - :param provide_fim: Whether to compute fisher information matrix during training - Either supply provide_fim and provide_hessian or optim_algos. - :param provide_hessian: Whether to compute hessians during training - Either supply provide_fim and provide_hessian or optim_algos. - :param optim_algos: Algorithms that you want to use on this object. Depending on that, - the hessian and/or fisher information matrix are computed. - Either supply provide_fim and provide_hessian or optim_algos. - :param extended_summary: Include detailed information in the summaries. - Will increase runtime of summary writer, use only for debugging. - :param dtype: Precision used in tensorflow. - """ - self.TrainingStrategies = TrainingStrategies - - self._input_data = input_data - self._train_loc = True - self._train_scale = True - - (init_a, init_b) = self.init_par( - input_data=input_data, - init_a=init_a, - init_b=init_b, - init_model=init_model - ) - init_a = init_a.astype(dtype) - init_b = init_b.astype(dtype) - if quick_scale: - self._train_scale = False - - if len(optim_algos) > 0: - if np.any([x.lower() in ["nr", "nr_tr"] for x in optim_algos]): - provide_hessian = True - if np.any([x.lower() in ["irls", "irls_tr", "irls_gd", "irls_gd_tr"] for x in optim_algos]): - provide_fim = True - - TFEstimatorGLM.__init__( - self=self, - input_data=input_data, - batch_size=batch_size, - graph=graph, - init_a=init_a, - init_b=init_b, - model=model, - provide_optimizers=provide_optimizers, - provide_batched=provide_batched, - provide_fim=provide_fim, - provide_hessian=provide_hessian, - extended_summary=extended_summary, - noise_model="beta", - dtype=dtype - ) - - def get_model_container( - self, - input_data - ): - return Model(input_data=input_data) - - def init_par( - self, - input_data, - init_a, - init_b, - init_model - ): - r""" - standard: - Only initialise intercept and keep other coefficients as zero. - - closed-form: - Initialize with Maximum Likelihood / Maximum of Momentum estimators - - Idea: - $$ - \theta &= f(x) \\ - \Rightarrow f^{-1}(\theta) &= x \\ - &= (D \cdot D^{+}) \cdot x \\ - &= D \cdot (D^{+} \cdot x) \\ - &= D \cdot x' = f^{-1}(\theta) - $$ - """ - - if init_model is None: - groupwise_means = None - init_a_str = None - if isinstance(init_a, str): - init_a_str = init_a.lower() - # Chose option if auto was chosen - if init_a.lower() == "auto": - init_a = "closed_form" - - if init_a.lower() == "closed_form": - groupwise_means, init_a, rmsd_a = closedform_beta_glm_logitmean( - x=input_data.x, - design_loc=input_data.design_loc, - constraints_loc=input_data.constraints_loc, - size_factors=input_data.size_factors, - link_fn=lambda mean: np.log( - 1/(1/self.np_clip_param(mean, "mean")-1) - ) - ) - - # train mu, if the closed-form solution is inaccurate - self._train_loc = not (np.all(rmsd_a == 0) or rmsd_a.size == 0) - - logging.getLogger("batchglm").debug("Using closed-form MME initialization for mean") - elif init_a.lower() == "standard": - overall_means = np.mean(input_data.x, axis=0) - overall_means = self.np_clip_param(overall_means, "mean") - - init_a = np.zeros([input_data.num_loc_params, input_data.num_features]) - init_a[0, :] = np.log(overall_means/(1-overall_means)) - self._train_loc = True - - logging.getLogger("batchglm").debug("Using standard initialization for mean") - elif init_a.lower() == "all_zero": - init_a = np.zeros([input_data.num_loc_params, input_data.num_features]) - self._train_loc = True - - logging.getLogger("batchglm").debug("Using all_zero initialization for mean") - else: - raise ValueError("init_a string %s not recognized" % init_a) - logging.getLogger("batchglm").debug("Should train mean: %s", self._train_loc) - if isinstance(init_b, str): - if init_b.lower() == "auto": - init_b = "standard" - - if init_b.lower() == "standard": - groupwise_scales, init_b_intercept, rmsd_b = closedform_beta_glm_logsamplesize( - x=input_data.x, - design_scale=input_data.design_scale[:, [0]], - constraints=input_data.constraints_scale[[0], :][:, [0]], - size_factors=input_data.size_factors, - groupwise_means=None, - link_fn=lambda samplesize: np.log(self.np_clip_param(samplesize, "samplesize")) - ) - init_b = np.zeros([input_data.num_scale_params, input_data.num_features]) - init_b[0, :] = init_b_intercept - - logging.getLogger("batchglm").debug("Using standard-form MME initialization for dispersion") - elif init_b.lower() == "closed_form": - dmats_unequal = False - if input_data.num_design_loc_params == input_data.num_design_scale_params: - if np.any(input_data.design_loc != input_data.design_scale): - dmats_unequal = True - - inits_unequal = False - if init_a_str is not None: - if init_a_str != init_b: - inits_unequal = True - - if inits_unequal or dmats_unequal: - raise ValueError("cannot use closed_form init for scale model " + - "if scale model differs from loc model") - - groupwise_scales, init_b, rmsd_b = closedform_beta_glm_logsamplesize( - x=input_data.x, - design_scale=input_data.design_scale, - constraints=input_data.constraints_scale, - size_factors=input_data.size_factors, - groupwise_means=groupwise_means, - link_fn=lambda samplesize: np.log(self.np_clip_param(samplesize, "samplesize")) - ) - - logging.getLogger("batchglm").debug("Using closed-form MME initialization for dispersion") - elif init_b.lower() == "all_zero": - init_b = np.zeros([input_data.num_scale_params, input_data.num_features]) - - logging.getLogger("batchglm").debug("Using standard initialization for dispersion") - else: - raise ValueError("init_b string %s not recognized" % init_b) - logging.getLogger("batchglm").debug("Should train r: %s", self._train_scale) - else: - # Locations model: - if isinstance(init_a, str) and (init_a.lower() == "auto" or init_a.lower() == "init_model"): - my_loc_names = set(input_data.loc_names) - my_loc_names = my_loc_names.intersection(set(init_model.input_data.loc_names)) - - init_loc = np.zeros([input_data.num_loc_params, input_data.num_features]) - for parm in my_loc_names: - init_idx = np.where(init_model.input_data.loc_names == parm)[0] - my_idx = np.where(input_data.loc_names == parm)[0] - init_loc[my_idx] = init_model.a_var[init_idx] - - init_a = init_loc - logging.getLogger("batchglm").debug("Using initialization based on input model for mean") - - # Scale model: - if isinstance(init_b, str) and (init_b.lower() == "auto" or init_b.lower() == "init_model"): - my_scale_names = set(input_data.scale_names) - my_scale_names = my_scale_names.intersection(init_model.input_data.scale_names) - - init_scale = np.zeros([input_data.num_scale_params, input_data.num_features]) - for parm in my_scale_names: - init_idx = np.where(init_model.input_data.scale_names == parm)[0] - my_idx = np.where(input_data.scale_names == parm)[0] - init_scale[my_idx] = init_model.b_var[init_idx] - - init_b = init_scale - logging.getLogger("batchglm").debug("Using initialization based on input model for dispersion") - - return init_a, init_b diff --git a/batchglm/train/tf1/glm_beta/estimator_graph.py b/batchglm/train/tf1/glm_beta/estimator_graph.py deleted file mode 100644 index 8e609600..00000000 --- a/batchglm/train/tf1/glm_beta/estimator_graph.py +++ /dev/null @@ -1,12 +0,0 @@ -import logging - -from .model import ProcessModel -from .external import EstimatorGraphAll - -logger = logging.getLogger(__name__) - - -class EstimatorGraph(ProcessModel, EstimatorGraphAll): - """ - Full class. - """ diff --git a/batchglm/train/tf1/glm_beta/external.py b/batchglm/train/tf1/glm_beta/external.py deleted file mode 100644 index d24db22b..00000000 --- a/batchglm/train/tf1/glm_beta/external.py +++ /dev/null @@ -1,18 +0,0 @@ -import batchglm.data as data_utils - -from batchglm.models.glm_beta import _EstimatorGLM, InputDataGLM, Model -from batchglm.models.base_glm.utils import closedform_glm_mean, closedform_glm_scale -from batchglm.models.glm_beta.utils import closedform_beta_glm_logitmean, closedform_beta_glm_logsamplesize - -import batchglm.train.tf1.ops as op_utils -import batchglm.train.tf1.train as train_utils -from batchglm.train.tf1.base import TFEstimatorGraph - -from batchglm.train.tf1.base_glm import GradientGraphGLM, NewtonGraphGLM, TrainerGraphGLM, EstimatorGraphGLM, FullDataModelGraphGLM, BasicModelGraphGLM -from batchglm.train.tf1.base_glm import ProcessModelGLM, ModelVarsGLM -from batchglm.train.tf1.base_glm import HessiansGLM, FIMGLM, JacobiansGLM - -from batchglm.train.tf1.base_glm_all import TFEstimatorGLM, EstimatorGraphAll, FIMGLMALL, HessianGLMALL, JacobiansGLMALL, ReducableTensorsGLMALL - -from batchglm.utils.linalg import groupwise_solve_lm -from batchglm import pkg_constants diff --git a/batchglm/train/tf1/glm_beta/fim.py b/batchglm/train/tf1/glm_beta/fim.py deleted file mode 100644 index e23b0a94..00000000 --- a/batchglm/train/tf1/glm_beta/fim.py +++ /dev/null @@ -1,25 +0,0 @@ -import tensorflow as tf - -import logging - -from .external import FIMGLMALL - -logger = logging.getLogger(__name__) - - -class FIM(FIMGLMALL): - # No Fisher Information Matrices due to unsolvable E[log(X)] - - def _weight_fim_aa( - self, - loc, - scale - ): - assert False, "not implemented" - - def _weight_fim_bb( - self, - loc, - scale - ): - assert False, "not implemented" diff --git a/batchglm/train/tf1/glm_beta/hessians.py b/batchglm/train/tf1/glm_beta/hessians.py deleted file mode 100644 index 73ca76e5..00000000 --- a/batchglm/train/tf1/glm_beta/hessians.py +++ /dev/null @@ -1,92 +0,0 @@ -import tensorflow as tf - -import logging - -from .external import HessianGLMALL - -logger = logging.getLogger(__name__) - - -class Hessians(HessianGLMALL): - - def _weight_hessian_aa( - self, - X, - loc, - scale, - ): - one_minus_loc = 1 - loc - loc_times_scale = loc * scale - one_minus_loc_times_scale = one_minus_loc * scale - - if isinstance(X, tf.SparseTensor): - # Using the dense matrix of the location model to serve the correct shapes for the sparse X. - const1 = tf.sparse_add(tf.zeros_like(loc), X).__div__(-tf.sparse.add(X, -tf.ones_like(loc))) - # Adding tf1.zeros_like(loc) is a hack to avoid bug thrown by log on sparse matrix below, - # to_dense does not work. - else: - const1 = tf.log(X / (tf.ones_like(X) - X)) - - const2 = (1 - 2 * loc) * (- tf.digamma(loc_times_scale) + tf.digamma(one_minus_loc_times_scale) + const1) - const3 = loc * one_minus_loc_times_scale * (- tf.polygamma(tf.ones_like(loc), loc_times_scale) - tf.polygamma(tf.ones_like(loc), one_minus_loc_times_scale)) - const = loc * one_minus_loc_times_scale * (const2 + const3) - return const - - def _weight_hessian_ab( - self, - X, - loc, - scale, - ): - one_minus_loc = 1 - loc - loc_times_scale = loc * scale - one_minus_loc_times_scale = one_minus_loc * scale - scalar_one = tf.constant(1, shape=(), dtype=self.dtype) - - if isinstance(X, tf.SparseTensor): - # Using the dense matrix of the location model to serve the correct shapes for the sparse X. - const1 = tf.sparse_add(tf.zeros_like(loc), X).__div__(-tf.sparse.add(X, -tf.ones_like(loc))) - # Adding tf1.zeros_like(loc) is a hack to avoid bug thrown by log on sparse matrix below, - # to_dense does not work. - else: - const1 = tf.log(X / (1 - X)) - - const2 = - tf.digamma(loc_times_scale) + tf.digamma(one_minus_loc_times_scale) + const1 - const3 = scale * (- tf.polygamma(scalar_one, loc_times_scale) * loc + one_minus_loc * tf.polygamma(scalar_one, one_minus_loc_times_scale)) - - const = loc * one_minus_loc_times_scale * (const2 + const3) - - return const - - def _weight_hessian_bb( - self, - X, - loc, - scale, - ): - one_minus_loc = 1 - loc - loc_times_scale = loc * scale - one_minus_loc_times_scale = one_minus_loc * scale - scalar_one = tf.constant(1, shape=(), dtype=self.dtype) - - if isinstance(X, tf.SparseTensor): - # Using the dense matrix of the location model to serve the correct shapes for the sparse X. - const1 = tf.sparse_add(tf.zeros_like(loc), X).__div__(-tf.sparse.add(X, -tf.ones_like(loc))) - # Adding tf1.zeros_like(loc) is a hack to avoid bug thrown by log on sparse matrix below, - # to_dense does not work. - const2 = loc * (tf.log(tf.sparse_add(tf.zeros_like(loc), X)) - tf.digamma(loc_times_scale)) \ - - one_minus_loc * (tf.digamma(one_minus_loc_times_scale) + tf.log(const1)) \ - + tf.digamma(scale) - else: - const1 = tf.log(X / (1 - X)) - const2 = loc * (tf.log(X) - tf.digamma(loc_times_scale))\ - - one_minus_loc * (tf.digamma(one_minus_loc_times_scale) + tf.log(const1)) \ - + tf.digamma(scale) - const3 = scale * (- tf.square(loc) * tf.polygamma(scalar_one, loc_times_scale)\ - + tf.polygamma(scalar_one, scale)\ - - tf.polygamma(scalar_one, one_minus_loc_times_scale) * tf.square(one_minus_loc)) - const = scale * (const2 + const3) - - return const - - diff --git a/batchglm/train/tf1/glm_beta/jacobians.py b/batchglm/train/tf1/glm_beta/jacobians.py deleted file mode 100644 index d599636d..00000000 --- a/batchglm/train/tf1/glm_beta/jacobians.py +++ /dev/null @@ -1,40 +0,0 @@ -import logging - -import tensorflow as tf - -from .external import JacobiansGLMALL - -logger = logging.getLogger(__name__) - - -class Jacobians(JacobiansGLMALL): - - def _weights_jac_a( - self, - X, - loc, - scale, - ): - one_minus_loc = 1 - loc - if isinstance(X, tf.SparseTensor): - const1 = tf.log(tf.sparse_add(tf.zeros_like(loc), X).__div__(-tf.sparse.add(X, -tf.ones_like(loc)))) - else: - const1 = tf.log(X/(1-X)) - const2 = - tf.digamma(loc*scale) + tf.digamma(one_minus_loc*scale) + const1 - const = const2 * scale * loc * one_minus_loc - return const - - def _weights_jac_b( - self, - X, - loc, - scale, - ): - if isinstance(X, tf.SparseTensor): - one_minus_X = - tf.sparse.add(X, -tf.ones_like(loc)) - else: - one_minus_X = 1 - X - one_minus_loc = 1 - loc - const = scale * (tf.digamma(scale) - tf.digamma(loc*scale) * loc - tf.digamma(one_minus_loc*scale) * one_minus_loc\ - + loc * tf.log(one_minus_X) + one_minus_loc * tf.log(one_minus_X)) - return const diff --git a/batchglm/train/tf1/glm_beta/model.py b/batchglm/train/tf1/glm_beta/model.py deleted file mode 100644 index 477747c3..00000000 --- a/batchglm/train/tf1/glm_beta/model.py +++ /dev/null @@ -1,133 +0,0 @@ -import logging - -import tensorflow as tf - -import numpy as np - -from .external import ProcessModelGLM, ModelVarsGLM, BasicModelGraphGLM -from .external import pkg_constants - -logger = logging.getLogger(__name__) - - -class ProcessModel(ProcessModelGLM): - - def param_bounds( - self, - dtype - ): - if isinstance(dtype, tf.DType): - dmin = dtype.min - dmax = dtype.max - dtype = dtype.as_numpy_dtype - else: - dtype = np.dtype(dtype) - dmin = np.finfo(dtype).min - dmax = np.finfo(dtype).max - dtype = dtype.type - - zero = np.nextafter(0, np.inf, dtype=dtype) - one = np.nextafter(1, -np.inf, dtype=dtype) - - sf = dtype(pkg_constants.ACCURACY_MARGIN_RELATIVE_TO_LIMIT) - bounds_min = { - "a_var": np.log(zero/(1-zero)) / sf, - "b_var": np.log(zero) / sf, - "eta_loc": np.log(zero/(1-zero)) / sf, - "eta_scale": np.log(zero) / sf, - "mean": np.nextafter(0, np.inf, dtype=dtype), - "samplesize": np.nextafter(0, np.inf, dtype=dtype), - "probs": dtype(0), - "log_probs": np.log(zero), - } - bounds_max = { - "a_var": np.log(one/(1-one)) / sf, - "b_var": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf, - "eta_loc": np.log(one/(1-one)) / sf, - "eta_scale": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf, - "mean": one, - "samplesize": np.nextafter(dmax, -np.inf, dtype=dtype) / sf, - "probs": dtype(1), - "log_probs": dtype(0), - } - return bounds_min, bounds_max - - -class ModelVars(ProcessModel, ModelVarsGLM): - """ - Full class. - """ - - -class BasicModelGraph(ProcessModel, BasicModelGraphGLM): - - def __init__( - self, - X, - design_loc, - design_scale, - constraints_loc, - constraints_scale, - a_var, - b_var, - dtype, - size_factors=None - ): - a_var = self.tf_clip_param(a_var, "a_var") - b_var = self.tf_clip_param(b_var, "b_var") - - if constraints_loc is not None: - eta_loc = tf.matmul(design_loc, tf.matmul(constraints_loc, a_var)) - else: - eta_loc = tf.matmul(design_loc, a_var) - - eta_loc = self.tf_clip_param(eta_loc, "eta_loc") - - if constraints_scale is not None: - eta_scale = tf.matmul(design_scale, tf.matmul(constraints_scale, b_var)) - else: - eta_scale = tf.matmul(design_scale, b_var) - - eta_scale = self.tf_clip_param(eta_scale, "eta_scale") - - # Inverse linker functions: - model_loc = 1/(1+tf.exp(-eta_loc)) - model_scale = tf.exp(eta_scale) - - # Log-likelihood: - if isinstance(X, tf.SparseTensor): - one_minus_X = -tf.sparse.add(X, -tf.ones_like(model_loc)) - else: - one_minus_X = 1 - X - - one_minus_loc = 1 - model_loc - log_probs = tf.math.lgamma(model_scale) - tf.math.lgamma(model_loc * model_scale)\ - - tf.math.lgamma(one_minus_loc * model_scale)\ - + (model_scale * model_loc - 1) * tf.math.log(one_minus_X)\ - + (one_minus_loc * model_scale - 1) * tf.math.log(one_minus_X) - - log_probs = self.tf_clip_param(log_probs, "log_probs") - - # Variance: - sigma2 = (model_loc * one_minus_loc) / (1 + model_scale) - - self.X = X - self.design_loc = design_loc - self.design_scale = design_scale - self.constraints_loc = constraints_loc - self.constraints_scale = constraints_scale - self.a_var = a_var - self.b_var = b_var - self.size_factors = size_factors - self.dtype = dtype - - self.eta_loc = eta_loc - self.eta_scale = eta_scale - self.model_loc = model_loc - self.model_scale = model_scale - self.mean = model_loc - self.samplesize = model_scale - - self.log_probs = log_probs - - self.sigma2 = sigma2 \ No newline at end of file diff --git a/batchglm/train/tf1/glm_beta/reducible_tensors.py b/batchglm/train/tf1/glm_beta/reducible_tensors.py deleted file mode 100644 index a89103ea..00000000 --- a/batchglm/train/tf1/glm_beta/reducible_tensors.py +++ /dev/null @@ -1,13 +0,0 @@ -import logging - -from .external import ReducableTensorsGLMALL -from .hessians import Hessians -from .jacobians import Jacobians -from .fim import FIM - -logger = logging.getLogger(__name__) - - -class ReducibleTensors(Jacobians, Hessians, FIM, ReducableTensorsGLMALL): - """ - """ diff --git a/batchglm/train/tf1/glm_beta/training_strategies.py b/batchglm/train/tf1/glm_beta/training_strategies.py deleted file mode 100644 index 9bd8b271..00000000 --- a/batchglm/train/tf1/glm_beta/training_strategies.py +++ /dev/null @@ -1,37 +0,0 @@ -from enum import Enum - -class TrainingStrategies(Enum): - - AUTO = None - DEFAULT = [ - { - "convergence_criteria": "all_converged_ll", - "stopping_criteria": 1e-8, - "use_batching": False, - "optim_algo": "nr_tr", - }, - ] - INEXACT = [ - { - "convergence_criteria": "all_converged_ll", - "stopping_criteria": 1e-6, - "use_batching": False, - "optim_algo": "nr_tr", - }, - ] - EXACT = [ - { - "convergence_criteria": "all_converged_ll", - "stopping_criteria": 1e-8, - "use_batching": False, - "optim_algo": "nr_tr", - }, - ] - IRLS = [ - { - "convergence_criteria": "all_converged_ll", - "stopping_criteria": 1e-8, - "use_batching": False, - "optim_algo": "irls_tr", - }, - ] \ No newline at end of file diff --git a/batchglm/train/tf1/glm_nb/__init__.py b/batchglm/train/tf1/glm_nb/__init__.py deleted file mode 100644 index 4db081bb..00000000 --- a/batchglm/train/tf1/glm_nb/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -from .estimator import Estimator -from .estimator_graph import EstimatorGraph -from .model import BasicModelGraph, ModelVars, ProcessModel -from .hessians import Hessians -from .fim import FIM -from .jacobians import Jacobians -from .reducible_tensors import ReducibleTensors diff --git a/batchglm/train/tf1/glm_nb/estimator.py b/batchglm/train/tf1/glm_nb/estimator.py deleted file mode 100644 index 65f0c592..00000000 --- a/batchglm/train/tf1/glm_nb/estimator.py +++ /dev/null @@ -1,152 +0,0 @@ -import logging -from typing import Union - -import numpy as np -try: - import tensorflow as tf -except: - tf = None - -from .external import TFEstimatorGLM, InputDataGLM, Model -from .external import init_par -from .estimator_graph import EstimatorGraph -from .model import ProcessModel -from .training_strategies import TrainingStrategies - - -class Estimator(TFEstimatorGLM, ProcessModel): - """ - Estimator for Generalized Linear Models (GLMs) with negative binomial noise. - Uses the natural logarithm as linker function. - """ - - def __init__( - self, - input_data: InputDataGLM, - batch_size: int = 512, - graph: tf.Graph = None, - init_model: Model = None, - init_a: Union[np.ndarray, str] = "AUTO", - init_b: Union[np.ndarray, str] = "AUTO", - quick_scale: bool = False, - model: EstimatorGraph = None, - provide_optimizers: dict = { - "gd": True, - "adam": True, - "adagrad": True, - "rmsprop": True, - "nr": True, - "nr_tr": True, - "irls": True, - "irls_gd": True, - "irls_tr": True, - "irls_gd_tr": True, - }, - provide_batched: bool = False, - provide_fim: bool = False, - provide_hessian: bool = False, - optim_algos: list = [], - extended_summary=False, - dtype="float64", - **kwargs - ): - """ - Performs initialisation and creates a new estimator. - - :param input_data: InputData - The input data - :param batch_size: int - Size of mini-batches used. - :param graph: (optional) tf1.Graph - :param init_model: (optional) - If provided, this model will be used to initialize this Estimator. - :param init_a: (Optional) - Low-level initial values for a. Can be: - - - str: - * "auto": automatically choose best initialization - * "random": initialize with random values - * "standard": initialize intercept with observed mean - * "init_model": initialize with another model (see `ìnit_model` parameter) - * "closed_form": try to initialize with closed form - - np.ndarray: direct initialization of 'a' - :param init_b: (Optional) - Low-level initial values for b. Can be: - - - str: - * "auto": automatically choose best initialization - * "random": initialize with random values - * "standard": initialize with zeros - * "init_model": initialize with another model (see `ìnit_model` parameter) - * "closed_form": try to initialize with closed form - - np.ndarray: direct initialization of 'b' - :param quick_scale: bool - Whether `scale` will be fitted faster and maybe less accurate. - Useful in scenarios where fitting the exact `scale` is not absolutely necessary. - :param model: EstimatorGraph - EstimatorGraph to use. Basically for debugging. - :param provide_optimizers: - - E.g. {"gd": False, "adam": False, "adagrad": False, "rmsprop": False, - "nr": False, "nr_tr": True, - "irls": False, "irls_gd": False, "irls_tr": False, "irls_gd_tr": False} - :param provide_batched: bool - Whether mini-batched optimizers should be provided. - :param provide_fim: Whether to compute fisher information matrix during training - Either supply provide_fim and provide_hessian or optim_algos. - :param provide_hessian: Whether to compute hessians during training - Either supply provide_fim and provide_hessian or optim_algos. - :param optim_algos: Algorithms that you want to use on this object. Depending on that, - the hessian and/or fisher information matrix are computed. - Either supply provide_fim and provide_hessian or optim_algos. - :param extended_summary: Include detailed information in the summaries. - Will increase runtime of summary writer, use only for debugging. - :param dtype: Precision used in tensorflow. - """ - if tf is None: - raise ValueError("tensorflow could not be imported." + - "Install tensorflow to use Estimators from the tf1 submodule") - self.TrainingStrategies = TrainingStrategies - - self._input_data = input_data - init_a, init_b, train_loc, train_scale = init_par( - input_data=input_data, - init_a=init_a, - init_b=init_b, - init_model=None - ) - self._train_loc = train_loc - self._train_scale = train_scale - init_a = init_a.astype(dtype) - init_b = init_b.astype(dtype) - if quick_scale: - self._train_scale = False - - if len(optim_algos) > 0: - if np.any([x.lower() in ["nr", "nr_tr"] for x in optim_algos]): - provide_hessian = True - if np.any([x.lower() in ["irls", "irls_tr", "irls_gd", "irls_gd_tr"] for x in optim_algos]): - provide_fim = True - - TFEstimatorGLM.__init__( - self=self, - input_data=input_data, - batch_size=batch_size, - graph=graph, - init_a=init_a, - init_b=init_b, - model=model, - provide_optimizers=provide_optimizers, - provide_batched=provide_batched, - provide_fim=provide_fim, - provide_hessian=provide_hessian, - extended_summary=extended_summary, - noise_model="nb", - dtype=dtype - ) - - def get_model_container( - self, - input_data - ): - return Model(input_data=input_data) diff --git a/batchglm/train/tf1/glm_nb/estimator_graph.py b/batchglm/train/tf1/glm_nb/estimator_graph.py deleted file mode 100644 index 8e609600..00000000 --- a/batchglm/train/tf1/glm_nb/estimator_graph.py +++ /dev/null @@ -1,12 +0,0 @@ -import logging - -from .model import ProcessModel -from .external import EstimatorGraphAll - -logger = logging.getLogger(__name__) - - -class EstimatorGraph(ProcessModel, EstimatorGraphAll): - """ - Full class. - """ diff --git a/batchglm/train/tf1/glm_nb/external.py b/batchglm/train/tf1/glm_nb/external.py deleted file mode 100644 index 5f04c9cf..00000000 --- a/batchglm/train/tf1/glm_nb/external.py +++ /dev/null @@ -1,17 +0,0 @@ -import batchglm.data as data_utils - -from batchglm.models.glm_nb import _EstimatorGLM, InputDataGLM, Model -from batchglm.models.base_glm.utils import closedform_glm_mean, closedform_glm_scale -from batchglm.models.glm_nb.utils import init_par - -import batchglm.train.tf1.train as train_utils -from batchglm.train.tf1.base import TFEstimatorGraph - -from batchglm.train.tf1.base_glm import GradientGraphGLM, NewtonGraphGLM, TrainerGraphGLM, EstimatorGraphGLM, FullDataModelGraphGLM, BasicModelGraphGLM -from batchglm.train.tf1.base_glm import ProcessModelGLM, ModelVarsGLM -from batchglm.train.tf1.base_glm import HessiansGLM, FIMGLM, JacobiansGLM - -from batchglm.train.tf1.base_glm_all import TFEstimatorGLM, EstimatorGraphAll, FIMGLMALL, HessianGLMALL, JacobiansGLMALL, ReducableTensorsGLMALL - -from batchglm.utils.linalg import groupwise_solve_lm -from batchglm import pkg_constants diff --git a/batchglm/train/tf1/glm_nb/fim.py b/batchglm/train/tf1/glm_nb/fim.py deleted file mode 100644 index 32248733..00000000 --- a/batchglm/train/tf1/glm_nb/fim.py +++ /dev/null @@ -1,43 +0,0 @@ -import tensorflow as tf - -import logging - -from .external import FIMGLMALL - -logger = logging.getLogger(__name__) - - -class FIM(FIMGLMALL): - - def _weight_fim_aa( - self, - loc, - scale - ): - const = tf.divide(scale, scale + loc) - W = tf.multiply(loc, const) - - return W - - def _weight_fim_bb( - self, - loc, - scale - ): - return tf.zeros_like(scale) - #scalar_one = tf.constant(1, shape=(), dtype=self.dtype) - #scalar_two = tf.constant(2, shape=(), dtype=self.dtype) - #scale_plus_loc = scale + loc - #digamma_r = tf.math.digamma(x=scale) - #digamma_r_plus_mu = tf.math.digamma(x=scale_plus_loc) - #const1 = tf.multiply(scalar_two, tf.add( - # digamma_r, - # digamma_r_plus_mu - #)) - #const2 = tf.multiply(scale, tf.add( - # tf.math.polygamma(a=scalar_one, x=scale), - # tf.math.polygamma(a=scalar_one, x=scale_plus_loc) - #)) - #const3 = tf.divide(scale, scale_plus_loc) - #W = tf.multiply(scale, tf.add_n([const1, const2, const3])) - #return W diff --git a/batchglm/train/tf1/glm_nb/hessians.py b/batchglm/train/tf1/glm_nb/hessians.py deleted file mode 100644 index 87562c26..00000000 --- a/batchglm/train/tf1/glm_nb/hessians.py +++ /dev/null @@ -1,93 +0,0 @@ -import tensorflow as tf - -import logging - -from .external import HessianGLMALL - -logger = logging.getLogger(__name__) - - -class Hessians(HessianGLMALL): - - def _weight_hessian_ab( - self, - X, - loc, - scale, - ): - if isinstance(X, tf.SparseTensor): - X_minus_mu = tf.sparse.add(X, -loc) - else: - X_minus_mu = X - loc - - const = tf.multiply( - loc * scale, - tf.divide( - X_minus_mu, - tf.square(loc + scale) - ) - ) - - return const - - def _weight_hessian_aa( - self, - X, - loc, - scale, - ): - if isinstance(X, tf.SparseTensor): - X_by_scale_plus_one = tf.sparse.add(X.__div__(scale), tf.ones_like(scale)) - else: - X_by_scale_plus_one = X / scale + tf.ones_like(scale) - - const = tf.negative(tf.multiply( - loc, - tf.divide( - X_by_scale_plus_one, - tf.square((loc / scale) + tf.ones_like(loc)) - ) - )) - - return const - - def _weight_hessian_bb( - self, - X, - loc, - scale, - ): - if isinstance(X, tf.SparseTensor): - scale_plus_x = tf.sparse.add(X, scale) - else: - scale_plus_x = X + scale - - scalar_one = tf.constant(1, shape=(), dtype=self.dtype) - scalar_two = tf.constant(2, shape=(), dtype=self.dtype) - # Pre-define sub-graphs that are used multiple times: - scale_plus_loc = scale + loc - # Define graphs for individual terms of constant term of hessian: - const1 = tf.add( - tf.math.digamma(x=scale_plus_x), - scale * tf.math.polygamma(a=scalar_one, x=scale_plus_x) - ) - const2 = tf.negative(tf.add( - tf.math.digamma(x=scale), - scale * tf.math.polygamma(a=scalar_one, x=scale) - )) - const3 = tf.negative(tf.divide( - tf.add( - loc * scale_plus_x, - scalar_two * scale * scale_plus_loc - ), - tf.math.square(scale_plus_loc) - )) - const4 = tf.add( - tf.math.log(scale), - scalar_two - tf.math.log(scale_plus_loc) - ) - const = tf.add_n([const1, const2, const3, const4]) - const = tf.multiply(scale, const) - return const - - diff --git a/batchglm/train/tf1/glm_nb/jacobians.py b/batchglm/train/tf1/glm_nb/jacobians.py deleted file mode 100644 index 59c6b174..00000000 --- a/batchglm/train/tf1/glm_nb/jacobians.py +++ /dev/null @@ -1,66 +0,0 @@ -import logging - -import tensorflow as tf - -from .external import JacobiansGLMALL - -logger = logging.getLogger(__name__) - - -class Jacobians(JacobiansGLMALL): - - def _weights_jac_a( - self, - X, - loc, - scale, - ): - if isinstance(X, tf.SparseTensor): - const = tf.multiply( - tf.sparse.add(X, scale), - tf.divide( - loc, - tf.add(loc, scale) - ) - ) - const = tf.sparse.add(X, -const) - else: - const = tf.multiply( - tf.add(X, scale), - tf.divide( - loc, - tf.add(loc, scale) - ) - ) - const = tf.subtract(X, const) - return const - - def _weights_jac_b( - self, - X, - loc, - scale, - ): - # Pre-define sub-graphs that are used multiple times: - scalar_one = tf.constant(1, shape=(), dtype=self.dtype) - if isinstance(X, tf.SparseTensor): - scale_plus_x = tf.sparse.add(X, scale) - else: - scale_plus_x = scale + X - - r_plus_mu = scale + loc - - # Define graphs for individual terms of constant term of hessian: - const1 = tf.subtract( - tf.math.digamma(x=scale_plus_x), - tf.math.digamma(x=scale) - ) - const2 = tf.negative(scale_plus_x / r_plus_mu) - const3 = tf.add( - tf.math.log(scale), - scalar_one - tf.math.log(r_plus_mu) - ) - const = tf.add_n([const1, const2, const3]) # [observations, features] - const = scale * const - - return const diff --git a/batchglm/train/tf1/glm_nb/model.py b/batchglm/train/tf1/glm_nb/model.py deleted file mode 100644 index fcec89ae..00000000 --- a/batchglm/train/tf1/glm_nb/model.py +++ /dev/null @@ -1,136 +0,0 @@ -import logging -import numpy as np -import tensorflow as tf - -from .external import ProcessModelGLM, ModelVarsGLM, BasicModelGraphGLM -from .external import pkg_constants - -logger = logging.getLogger(__name__) - - -class ProcessModel(ProcessModelGLM): - - def param_bounds( - self, - dtype - ): - if isinstance(dtype, tf.DType): - dmin = dtype.min - dmax = dtype.max - dtype = dtype.as_numpy_dtype - else: - dtype = np.dtype(dtype) - dmin = np.finfo(dtype).min - dmax = np.finfo(dtype).max - dtype = dtype.type - - sf = dtype(pkg_constants.ACCURACY_MARGIN_RELATIVE_TO_LIMIT) - bounds_min = { - "a_var": np.log(np.nextafter(0, np.inf, dtype=dtype)) / sf, - "b_var": np.log(np.nextafter(0, np.inf, dtype=dtype)) / sf, - "eta_loc": np.log(np.nextafter(0, np.inf, dtype=dtype)) / sf, - "eta_scale": np.log(np.nextafter(0, np.inf, dtype=dtype)) / sf, - "mu": np.nextafter(0, np.inf, dtype=dtype), - "r": np.nextafter(0, np.inf, dtype=dtype), - "probs": dtype(0), - "log_probs": np.log(np.nextafter(0, np.inf, dtype=dtype)), - } - bounds_max = { - "a_var": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf, - "b_var": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf, - "eta_loc": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf, - "eta_scale": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf, - "mu": np.nextafter(dmax, -np.inf, dtype=dtype) / sf, - "r": np.nextafter(dmax, -np.inf, dtype=dtype) / sf, - "probs": dtype(1), - "log_probs": dtype(0), - } - return bounds_min, bounds_max - - -class ModelVars(ProcessModel, ModelVarsGLM): - """ - Full class. - """ - - -class BasicModelGraph(ProcessModel, BasicModelGraphGLM): - - def __init__( - self, - X, - design_loc, - design_scale, - constraints_loc, - constraints_scale, - a_var, - b_var, - dtype, - size_factors=None - ): - a_var = self.tf_clip_param(a_var, "a_var") - b_var = self.tf_clip_param(b_var, "b_var") - - if constraints_loc is not None: - eta_loc = tf.matmul(design_loc, tf.matmul(constraints_loc, a_var)) - else: - eta_loc = tf.matmul(design_loc, a_var) - - if size_factors is not None: - eta_loc = tf.add(eta_loc, tf.math.log(size_factors)) - - eta_loc = self.tf_clip_param(eta_loc, "eta_loc") - - if constraints_scale is not None: - eta_scale = tf.matmul(design_scale, tf.matmul(constraints_scale, b_var)) - else: - eta_scale = tf.matmul(design_scale, b_var) - - eta_scale = self.tf_clip_param(eta_scale, "eta_scale") - - # Inverse linker functions: - model_loc = tf.math.exp(eta_loc) - model_scale = tf.math.exp(eta_scale) - - # Log-likelihood: - log_r_plus_mu = tf.math.log(model_scale + model_loc) - if isinstance(X, tf.SparseTensor): - log_probs_sparse = X.__mul__(eta_loc - log_r_plus_mu) - log_probs_dense = tf.math.lgamma(tf.sparse.add(X, model_scale)) - \ - tf.math.lgamma(tf.sparse.add(X, tf.ones(shape=X.dense_shape, dtype=dtype))) - \ - tf.math.lgamma(model_scale) + \ - tf.multiply(model_scale, eta_scale - log_r_plus_mu) - log_probs = tf.sparse.add(log_probs_sparse, log_probs_dense) - log_probs.set_shape([None, a_var.shape[1]]) # Need this so as shape is completely lost. - else: - log_probs = tf.math.lgamma(model_scale + X) - \ - tf.math.lgamma(X + tf.ones_like(X)) - \ - tf.math.lgamma(model_scale) + \ - tf.multiply(X, eta_loc - log_r_plus_mu) + \ - tf.multiply(model_scale, eta_scale - log_r_plus_mu) - - log_probs = self.tf_clip_param(log_probs, "log_probs") - - # Variance: - sigma2 = model_loc + tf.multiply(tf.square(model_loc), model_scale) - - self.X = X - self.design_loc = design_loc - self.design_scale = design_scale - self.constraints_loc = constraints_loc - self.constraints_scale = constraints_scale - self.a_var = a_var - self.b_var = b_var - self.size_factors = size_factors - self.dtype = dtype - - self.eta_loc = eta_loc - self.eta_scale = eta_scale - self.model_loc = model_loc - self.model_scale = model_scale - self.mu = model_loc - self.r = model_scale - - self.log_probs = log_probs - - self.sigma2 = sigma2 diff --git a/batchglm/train/tf1/glm_nb/reducible_tensors.py b/batchglm/train/tf1/glm_nb/reducible_tensors.py deleted file mode 100644 index 862ccaf8..00000000 --- a/batchglm/train/tf1/glm_nb/reducible_tensors.py +++ /dev/null @@ -1,13 +0,0 @@ -import logging - -from .external import ReducableTensorsGLMALL -from .hessians import Hessians -from .jacobians import Jacobians -from .fim import FIM - -logger = logging.getLogger("batchglm") - - -class ReducibleTensors(Jacobians, Hessians, FIM, ReducableTensorsGLMALL): - """ - """ diff --git a/batchglm/train/tf1/glm_nb/training_strategies.py b/batchglm/train/tf1/glm_nb/training_strategies.py deleted file mode 100644 index d9e57377..00000000 --- a/batchglm/train/tf1/glm_nb/training_strategies.py +++ /dev/null @@ -1,27 +0,0 @@ -from enum import Enum - - -class TrainingStrategies(Enum): - - AUTO = None - DEFAULT = [ - { - "convergence_criteria": "all_converged", - "use_batching": False, - "optim_algo": "irls_gd_tr", - }, - ] - IRLS = [ - { - "convergence_criteria": "all_converged", - "use_batching": False, - "optim_algo": "irls_gd_tr", - }, - ] - IRLS_BATCHED = [ - { - "convergence_criteria": "all_converged", - "use_batching": True, - "optim_algo": "irls_gd_tr", - }, - ] diff --git a/batchglm/train/tf1/glm_norm/__init__.py b/batchglm/train/tf1/glm_norm/__init__.py deleted file mode 100644 index 4db081bb..00000000 --- a/batchglm/train/tf1/glm_norm/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -from .estimator import Estimator -from .estimator_graph import EstimatorGraph -from .model import BasicModelGraph, ModelVars, ProcessModel -from .hessians import Hessians -from .fim import FIM -from .jacobians import Jacobians -from .reducible_tensors import ReducibleTensors diff --git a/batchglm/train/tf1/glm_norm/estimator.py b/batchglm/train/tf1/glm_norm/estimator.py deleted file mode 100644 index bd1778ac..00000000 --- a/batchglm/train/tf1/glm_norm/estimator.py +++ /dev/null @@ -1,325 +0,0 @@ -import logging -import numpy as np -import scipy.sparse -import tensorflow as tf -from typing import Union - -from .external import TFEstimatorGLM, InputDataGLM, Model -from .external import closedform_norm_glm_mean, closedform_norm_glm_logsd -from .estimator_graph import EstimatorGraph -from .model import ProcessModel -from .training_strategies import TrainingStrategies - -logger = logging.getLogger("batchglm") - - -class Estimator(TFEstimatorGLM, ProcessModel): - """ - Estimator for Generalized Linear Models (GLMs) with normal distributed noise. - Uses the identity function as linker function for loc and a log-linker function for scale. - """ - - def __init__( - self, - input_data: InputDataGLM, - batch_size: int = 512, - graph: tf.Graph = None, - init_model: Model = None, - init_a: Union[np.ndarray, str] = "AUTO", - init_b: Union[np.ndarray, str] = "AUTO", - quick_scale: bool = False, - model: EstimatorGraph = None, - provide_optimizers: dict = { - "gd": True, - "adam": True, - "adagrad": True, - "rmsprop": True, - "nr": True, - "nr_tr": True, - "irls": True, - "irls_gd": True, - "irls_tr": True, - "irls_gd_tr": True, - }, - provide_batched: bool = False, - provide_fim: bool = False, - provide_hessian: bool = False, - optim_algos: list = [], - extended_summary=False, - dtype="float64" - ): - """ - Performs initialisation and creates a new estimator. - - :param input_data: InputData - The input data - :param batch_size: int - Size of mini-batches used. - :param graph: (optional) tf1.Graph - :param init_model: (optional) - If provided, this model will be used to initialize this Estimator. - :param init_a: (Optional) - Low-level initial values for a. Can be: - - - str: - * "auto": automatically choose best initialization - * "all zero": initialize with zeros - * "random": initialize with random values - * "standard": initialize intercept with observed mean - * "init_model": initialize with another model (see `ìnit_model` parameter) - * "closed_form": try to initialize with closed form - - np.ndarray: direct initialization of 'a' - :param init_b: (Optional) - Low-level initial values for b. Can be: - - - str: - * "auto": automatically choose best initialization - * "random": initialize with random values - * "standard": initialize with zeros - * "init_model": initialize with another model (see `ìnit_model` parameter) - * "closed_form": try to initialize with closed form - - np.ndarray: direct initialization of 'b' - :param quick_scale: bool - Whether `scale` will be fitted faster and maybe less accurate. - Useful in scenarios where fitting the exact `scale` is not absolutely necessary. - :param model: EstimatorGraph - EstimatorGraph to use. Basically for debugging. - :param provide_optimizers: - - E.g. {"gd": False, "adam": False, "adagrad": False, "rmsprop": False, - "nr": False, "nr_tr": True, - "irls": False, "irls_gd": False, "irls_tr": False, "irls_gd_tr": False} - :param provide_batched: bool - Whether mini-batched optimizers should be provided. - :param provide_fim: Whether to compute fisher information matrix during training - Either supply provide_fim and provide_hessian or optim_algos. - :param provide_hessian: Whether to compute hessians during training - Either supply provide_fim and provide_hessian or optim_algos. - :param optim_algos: Algorithms that you want to use on this object. Depending on that, - the hessian and/or fisher information matrix are computed. - Either supply provide_fim and provide_hessian or optim_algos. - :param extended_summary: Include detailed information in the summaries. - Will increase runtime of summary writer, use only for debugging. - :param dtype: Precision used in tensorflow. - """ - self.TrainingStrategies = TrainingStrategies - - self._input_data = input_data - self._train_loc = True - self._train_scale = True - - (init_a, init_b) = self.init_par( - input_data=input_data, - init_a=init_a, - init_b=init_b, - init_model=init_model - ) - init_a = init_a.astype(dtype) - init_b = init_b.astype(dtype) - if quick_scale: - self._train_scale = False - - if len(optim_algos) > 0: - if np.any([x.lower() in ["nr", "nr_tr"] for x in optim_algos]): - provide_hessian = True - if np.any([x.lower() in ["irls", "irls_tr"] for x in optim_algos]): - provide_fim = True - - TFEstimatorGLM.__init__( - self=self, - input_data=input_data, - batch_size=batch_size, - graph=graph, - init_a=init_a, - init_b=init_b, - model=model, - provide_optimizers=provide_optimizers, - provide_batched=provide_batched, - provide_fim=provide_fim, - provide_hessian=provide_hessian, - extended_summary=extended_summary, - noise_model="norm", - dtype=dtype - ) - - def get_model_container( - self, - input_data - ): - return Model(input_data=input_data) - - def init_par( - self, - input_data, - init_a, - init_b, - init_model - ): - r""" - standard: - Only initialise intercept and keep other coefficients as zero. - - closed-form: - Initialize with Maximum Likelihood / Maximum of Momentum estimators - - Idea: - $$ - \theta &= f(x) \\ - \Rightarrow f^{-1}(\theta) &= x \\ - &= (D \cdot D^{+}) \cdot x \\ - &= D \cdot (D^{+} \cdot x) \\ - &= D \cdot x' = f^{-1}(\theta) - $$ - """ - - sf_given = False - if input_data.size_factors is not None: - if np.any(np.abs(input_data.size_factors - 1.) > 1e-8): - sf_given = True - - is_ols_model = input_data.design_scale.shape[1] == 1 and \ - np.all(np.abs(input_data.design_scale - 1.) < 1e-8) and \ - not sf_given - - if init_model is None: - groupwise_means = None - init_a_str = None - if isinstance(init_a, str): - init_a_str = init_a.lower() - # Chose option if auto was chosen - if init_a.lower() == "auto": - init_a = "closed_form" - - if init_a.lower() == "closed_form" or init_a.lower() == "standard": - design_constr = np.matmul(input_data.design_loc, input_data.constraints_loc) - # Iterate over genes if X is sparse to avoid large sparse tensor. - # If X is dense, the least square problem can be vectorised easily. - if isinstance(input_data.x, scipy.sparse.csr_matrix): - init_a, rmsd_a, _, _ = np.linalg.lstsq( - np.matmul(design_constr.T, design_constr), - input_data.x.T.dot(design_constr).T, # need double .T because of dot product on sparse. - rcond=None - ) - else: - init_a, rmsd_a, _, _ = np.linalg.lstsq( - np.matmul(design_constr.T, design_constr), - np.matmul(design_constr.T, input_data.x), - rcond=None - ) - groupwise_means = None - if is_ols_model: - self._train_loc = False - - logger.debug("Using OLS initialization for location model") - elif init_a.lower() == "all_zero": - init_a = np.zeros([input_data.num_loc_params, input_data.num_features]) - self._train_loc = True - - logger.debug("Using all_zero initialization for mean") - else: - raise ValueError("init_a string %s not recognized" % init_a) - logger.debug("Should train location model: %s", self._train_loc) - - if isinstance(init_b, str): - if init_b.lower() == "auto": - init_b = "standard" - - if is_ols_model: - # Calculated variance via E(x)^2 or directly depending on whether `mu` was specified. - if isinstance(input_data.x, scipy.sparse.csr_matrix): - expect_xsq = np.asarray(np.mean(input_data.x.power(2), axis=0)) - else: - expect_xsq = np.expand_dims(np.mean(np.square(input_data.x), axis=0), axis=0) - mean_model = np.matmul( - np.matmul(input_data.design_loc, input_data.constraints_loc), - init_a - ) - expect_x_sq = np.mean(np.square(mean_model), axis=0) - variance = (expect_xsq - expect_x_sq) - init_b = np.log(np.sqrt(variance)) - self._train_scale = False - - logger.debug("Using residuals from OLS estimate for variance estimate") - elif init_b.lower() == "closed_form": - dmats_unequal = False - if input_data.design_loc.shape[1] == input_data.design_scale.shape[1]: - if np.any(input_data.design_loc != input_data.design_scale): - dmats_unequal = True - - inits_unequal = False - if init_a_str is not None: - if init_a_str != init_b: - inits_unequal = True - - # Watch out: init_mean is full obs x features matrix and is very large in many cases. - if inits_unequal or dmats_unequal: - raise ValueError("cannot use closed_form init for scale model " + - "if scale model differs from loc model") - - groupwise_scales, init_b, rmsd_b = closedform_norm_glm_logsd( - x=input_data.x, - design_scale=input_data.design_scale, - constraints=input_data.constraints_scale, - size_factors=input_data.size_factors, - groupwise_means=groupwise_means, - link_fn=lambda sd: np.log(self.np_clip_param(sd, "sd")) - ) - - # train scale, if the closed-form solution is inaccurate - self._train_scale = not (np.all(rmsd_b == 0) or rmsd_b.size == 0) - - logger.debug("Using closed-form MME initialization for standard deviation") - elif init_b.lower() == "standard": - groupwise_scales, init_b_intercept, rmsd_b = closedform_norm_glm_logsd( - x=input_data.x, - design_scale=input_data.design_scale[:, [0]], - constraints=input_data.constraints_scale[[0], :][:, [0]], - size_factors=input_data.size_factors, - groupwise_means=None, - link_fn=lambda sd: np.log(self.np_clip_param(sd, "sd")) - ) - init_b = np.zeros([input_data.num_scale_params, input_data.num_features]) - init_b[0, :] = init_b_intercept - - # train scale, if the closed-form solution is inaccurate - self._train_scale = not (np.all(rmsd_b == 0) or rmsd_b.size == 0) - - logger.debug("Using closed-form MME initialization for standard deviation") - logger.debug("Should train sd: %s", self._train_scale) - elif init_b.lower() == "all_zero": - init_b = np.zeros([input_data.num_scale_params, input_data.num_features]) - - logger.debug("Using standard initialization for standard deviation") - else: - raise ValueError("init_b string %s not recognized" % init_b) - logger.debug("Should train sd: %s", self._train_scale) - else: - # Locations model: - if isinstance(init_a, str) and (init_a.lower() == "auto" or init_a.lower() == "init_model"): - my_loc_names = set(input_data.loc_names) - my_loc_names = my_loc_names.intersection(set(init_model.input_data.loc_names)) - - init_loc = np.zeros([input_data.num_loc_params, input_data.num_features]) - for parm in my_loc_names: - init_idx = np.where(init_model.input_data.loc_names == parm)[0] - my_idx = np.where(input_data.loc_names == parm)[0] - init_loc[my_idx] = init_model.a_var[init_idx] - - init_a = init_loc - logger.debug("Using initialization based on input model for mean") - - # Scale model: - if isinstance(init_b, str) and (init_b.lower() == "auto" or init_b.lower() == "init_model"): - my_scale_names = set(input_data.scale_names) - my_scale_names = my_scale_names.intersection(init_model.input_data.scale_names) - - init_scale = np.zeros([input_data.num_scale_params, input_data.num_features]) - for parm in my_scale_names: - init_idx = np.where(init_model.input_data.scale_names == parm)[0] - my_idx = np.where(input_data.scale_names == parm)[0] - init_scale[my_idx] = init_model.b_var[init_idx] - - init_b = init_scale - logger.debug("Using initialization based on input model for dispersion") - - return init_a, init_b diff --git a/batchglm/train/tf1/glm_norm/estimator_graph.py b/batchglm/train/tf1/glm_norm/estimator_graph.py deleted file mode 100644 index 8e609600..00000000 --- a/batchglm/train/tf1/glm_norm/estimator_graph.py +++ /dev/null @@ -1,12 +0,0 @@ -import logging - -from .model import ProcessModel -from .external import EstimatorGraphAll - -logger = logging.getLogger(__name__) - - -class EstimatorGraph(ProcessModel, EstimatorGraphAll): - """ - Full class. - """ diff --git a/batchglm/train/tf1/glm_norm/external.py b/batchglm/train/tf1/glm_norm/external.py deleted file mode 100644 index 3acba1c9..00000000 --- a/batchglm/train/tf1/glm_norm/external.py +++ /dev/null @@ -1,18 +0,0 @@ -import batchglm.data as data_utils - -from batchglm.models.glm_norm import _EstimatorGLM, InputDataGLM, Model -from batchglm.models.base_glm.utils import closedform_glm_mean, closedform_glm_scale -from batchglm.models.glm_norm.utils import closedform_norm_glm_mean, closedform_norm_glm_logsd - -import batchglm.train.tf1.ops as op_utils -import batchglm.train.tf1.train as train_utils -from batchglm.train.tf1.base import TFEstimatorGraph - -from batchglm.train.tf1.base_glm import GradientGraphGLM, NewtonGraphGLM, TrainerGraphGLM, EstimatorGraphGLM, FullDataModelGraphGLM, BasicModelGraphGLM -from batchglm.train.tf1.base_glm import ProcessModelGLM, ModelVarsGLM -from batchglm.train.tf1.base_glm import HessiansGLM, FIMGLM, JacobiansGLM - -from batchglm.train.tf1.base_glm_all import TFEstimatorGLM, EstimatorGraphAll, FIMGLMALL, HessianGLMALL, JacobiansGLMALL, ReducableTensorsGLMALL - -from batchglm.utils.linalg import groupwise_solve_lm -from batchglm import pkg_constants diff --git a/batchglm/train/tf1/glm_norm/fim.py b/batchglm/train/tf1/glm_norm/fim.py deleted file mode 100644 index 06fce476..00000000 --- a/batchglm/train/tf1/glm_norm/fim.py +++ /dev/null @@ -1,28 +0,0 @@ -import tensorflow as tf - -import logging - -from .external import FIMGLMALL - -logger = logging.getLogger(__name__) - - -class FIM(FIMGLMALL): - - def _weight_fim_aa( - self, - loc, - scale - ): - W = tf.square(tf.divide(tf.ones_like(scale), scale)) - - return W - - def _weight_fim_bb( - self, - loc, - scale - ): - W = tf.constant(2, shape=loc.shape, dtype=self.dtype) - - return W diff --git a/batchglm/train/tf1/glm_norm/hessians.py b/batchglm/train/tf1/glm_norm/hessians.py deleted file mode 100644 index 69238c12..00000000 --- a/batchglm/train/tf1/glm_norm/hessians.py +++ /dev/null @@ -1,66 +0,0 @@ -import tensorflow as tf - -import logging - -from .external import HessianGLMALL - -logger = logging.getLogger(__name__) - - -class Hessians(HessianGLMALL): - - def _weight_hessian_ab( - self, - X, - loc, - scale, - ): - scalar_two = tf.constant(2, shape=(), dtype=self.dtype) - if isinstance(X, tf.SparseTensor): - X_minus_loc = tf.sparse.add(X, -loc) - else: - X_minus_loc = X - loc - - const = - tf.multiply(scalar_two, - tf.divide( - X_minus_loc, - tf.square(scale) - ) - ) - return const - - def _weight_hessian_aa( - self, - X, - loc, - scale, - ): - scalar_one = tf.constant(1, shape=(), dtype=self.dtype) - const = - tf.divide(scalar_one, tf.square(scale)) - - return const - - def _weight_hessian_bb( - self, - X, - loc, - scale, - ): - scalar_two = tf.constant(2, shape=(), dtype=self.dtype) - if isinstance(X, tf.SparseTensor): - X_minus_loc = tf.sparse.add(X, -loc) - else: - X_minus_loc = X - loc - - const = - tf.multiply( - scalar_two, - tf.math.square( - tf.divide( - X_minus_loc, - scale - ) - ) - ) - return const - - diff --git a/batchglm/train/tf1/glm_norm/jacobians.py b/batchglm/train/tf1/glm_norm/jacobians.py deleted file mode 100644 index 04a60d88..00000000 --- a/batchglm/train/tf1/glm_norm/jacobians.py +++ /dev/null @@ -1,41 +0,0 @@ -import logging - -import tensorflow as tf - -from .external import JacobiansGLMALL - -logger = logging.getLogger(__name__) - - -class Jacobians(JacobiansGLMALL): - - def _weights_jac_a( - self, - X, - loc, - scale, - ): - if isinstance(X, tf.SparseTensor): - const1 = tf.sparse.add(X, -loc) - const = tf.divide(const1, tf.square(scale)) - else: - const1 = tf.subtract(X, loc) - const = tf.divide(const1, tf.square(scale)) - return const - - def _weights_jac_b( - self, - X, - loc, - scale, - ): - scalar_one = tf.constant(1, shape=(), dtype=self.dtype) - if isinstance(X, tf.SparseTensor): - const = tf.negative(scalar_one) + tf.math.square( - tf.divide(tf.sparse.add(X, -loc), scale) - ) - else: - const = tf.negative(scalar_one) + tf.math.square( - tf.divide(tf.subtract(X, loc), scale) - ) - return const diff --git a/batchglm/train/tf1/glm_norm/model.py b/batchglm/train/tf1/glm_norm/model.py deleted file mode 100644 index 0ac6efc0..00000000 --- a/batchglm/train/tf1/glm_norm/model.py +++ /dev/null @@ -1,138 +0,0 @@ -import logging - -import tensorflow as tf - -import numpy as np - -from .external import ProcessModelGLM, ModelVarsGLM, BasicModelGraphGLM -from .external import pkg_constants - -logger = logging.getLogger(__name__) - - -class ProcessModel(ProcessModelGLM): - - def param_bounds( - self, - dtype - ): - if isinstance(dtype, tf.DType): - dmin = dtype.min - dmax = dtype.max - dtype = dtype.as_numpy_dtype - else: - dtype = np.dtype(dtype) - dmin = np.finfo(dtype).min - dmax = np.finfo(dtype).max - dtype = dtype.type - - sf = dtype(pkg_constants.ACCURACY_MARGIN_RELATIVE_TO_LIMIT) - bounds_min = { - "a_var": np.nextafter(-dmax, np.inf, dtype=dtype) / sf, - "b_var": np.log(np.nextafter(0, np.inf, dtype=dtype)) / sf, - "eta_loc": np.nextafter(-dmax, np.inf, dtype=dtype) / sf, - "eta_scale": np.log(np.nextafter(0, np.inf, dtype=dtype)) / sf, - "mean": np.nextafter(-dmax, np.inf, dtype=dtype) / sf, - "sd": np.nextafter(0, np.inf, dtype=dtype), - "probs": dtype(0), - "log_probs": np.log(np.nextafter(0, np.inf, dtype=dtype)), - } - bounds_max = { - "a_var": np.nextafter(dmax, -np.inf, dtype=dtype) / sf, - "b_var": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf, - "eta_loc": np.nextafter(dmax, -np.inf, dtype=dtype) / sf, - "eta_scale": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf, - "mean": np.nextafter(dmax, -np.inf, dtype=dtype) / sf, - "sd": np.nextafter(dmax, -np.inf, dtype=dtype) / sf, - "probs": dtype(1), - "log_probs": dtype(0), - } - return bounds_min, bounds_max - - -class ModelVars(ProcessModel, ModelVarsGLM): - """ - Full class. - """ - - -class BasicModelGraph(ProcessModel, BasicModelGraphGLM): - - def __init__( - self, - X, - design_loc, - design_scale, - constraints_loc, - constraints_scale, - a_var, - b_var, - dtype, - size_factors=None - ): - a_var = self.tf_clip_param(a_var, "a_var") - b_var = self.tf_clip_param(b_var, "b_var") - - if constraints_loc is not None: - eta_loc = tf.matmul(design_loc, tf.matmul(constraints_loc, a_var)) - else: - eta_loc = tf.matmul(design_loc, a_var) - - if size_factors is not None: - eta_loc = tf.multiply(eta_loc, size_factors) - - eta_loc = self.tf_clip_param(eta_loc, "eta_loc") - - if constraints_scale is not None: - eta_scale = tf.matmul(design_scale, tf.matmul(constraints_scale, b_var)) - else: - eta_scale = tf.matmul(design_scale, b_var) - - eta_scale = self.tf_clip_param(eta_scale, "eta_scale") - - # Inverse linker functions: - model_loc = eta_loc - model_scale = tf.math.exp(eta_scale) - - # Log-likelihood: - const = tf.constant(-0.5 * np.log(2 * np.pi), shape=(), dtype=dtype) - if isinstance(X, tf.SparseTensor): - log_probs = const - \ - eta_scale - \ - 0.5 * tf.math.square(tf.divide( - tf.sparse.add(X, - model_loc), - model_scale - )) - log_probs.set_shape([None, a_var.shape[1]]) # Need this so as shape is completely lost. - else: - log_probs = const - \ - eta_scale - \ - 0.5 * tf.math.square(tf.divide( - X - model_loc, - model_scale - )) - log_probs = self.tf_clip_param(log_probs, "log_probs") - - # Variance: - sigma2 = tf.square(model_scale) - - self.X = X - self.design_loc = design_loc - self.design_scale = design_scale - self.constraints_loc = constraints_loc - self.constraints_scale = constraints_scale - self.a_var = a_var - self.b_var = b_var - self.size_factors = size_factors - self.dtype = dtype - - self.eta_loc = eta_loc - self.eta_scale = eta_scale - self.model_loc = model_loc - self.model_scale = model_scale - self.mean = model_loc - self.sd = model_scale - - self.log_probs = log_probs - - self.sigma2 = sigma2 diff --git a/batchglm/train/tf1/glm_norm/reducible_tensors.py b/batchglm/train/tf1/glm_norm/reducible_tensors.py deleted file mode 100644 index 862ccaf8..00000000 --- a/batchglm/train/tf1/glm_norm/reducible_tensors.py +++ /dev/null @@ -1,13 +0,0 @@ -import logging - -from .external import ReducableTensorsGLMALL -from .hessians import Hessians -from .jacobians import Jacobians -from .fim import FIM - -logger = logging.getLogger("batchglm") - - -class ReducibleTensors(Jacobians, Hessians, FIM, ReducableTensorsGLMALL): - """ - """ diff --git a/batchglm/train/tf1/glm_norm/training_strategies.py b/batchglm/train/tf1/glm_norm/training_strategies.py deleted file mode 100644 index 2ba524a7..00000000 --- a/batchglm/train/tf1/glm_norm/training_strategies.py +++ /dev/null @@ -1,27 +0,0 @@ -from enum import Enum - - -class TrainingStrategies(Enum): - - AUTO = None - DEFAULT = [ - { - "convergence_criteria": "all_converged", - "use_batching": False, - "optim_algo": "irls_tr", - }, - ] - IRLS = [ - { - "convergence_criteria": "all_converged", - "use_batching": False, - "optim_algo": "irls_tr", - }, - ] - IRLS_BATCHED = [ - { - "convergence_criteria": "all_converged", - "use_batching": True, - "optim_algo": "irls_tr", - }, - ] diff --git a/batchglm/train/tf1/ops.py b/batchglm/train/tf1/ops.py deleted file mode 100644 index 8c6ea45f..00000000 --- a/batchglm/train/tf1/ops.py +++ /dev/null @@ -1,59 +0,0 @@ -import tensorflow as tf -from typing import Union - - -def swap_dims(tensor, axis0, axis1, exec_transpose=True, return_perm=False, name="swap_dims"): - """ - Swaps two dimensions in a given tensor. - - :param tensor: The tensor whose axes should be swapped - :param axis0: The first axis which should be swapped with `axis1` - :param axis1: The second axis which should be swapped with `axis0` - :param exec_transpose: Should the transpose operation be applied? - :param return_perm: Should the permutation argument for `tf1.transpose` be returned? - Autmoatically true, if `exec_transpose` is False - :param name: The name scope of this op - :return: either retval, (retval, permutation) or permutation - """ - with tf.name_scope(name): - rank = tf.range(tf.rank(tensor)) - idx0 = rank[axis0] - idx1 = rank[axis1] - perm0 = tf.where(tf.equal(rank, idx0), tf.tile(tf.expand_dims(idx1, 0), [tf.size(rank)]), rank) - perm1 = tf.where(tf.equal(rank, idx1), tf.tile(tf.expand_dims(idx0, 0), [tf.size(rank)]), perm0) - - if exec_transpose: - retval = tf.transpose(tensor, perm1) - - if return_perm: - return retval, perm1 - else: - return retval - else: - return perm1 - - -def stacked_lstsq(L, b, rcond=1e-10, name="stacked_lstsq"): - r""" - Solve `Lx = b`, via SVD least squares cutting of small singular values - - :param L: tensor of shape (..., M, K) - :param b: tensor of shape (..., M, N). - :param rcond: threshold for inverse - :param name: name scope of this op - :return: x of shape (..., K, N) - """ - with tf.name_scope(name): - u, s, v = tf.linalg.svd(L, full_matrices=False) - s_max = s.max(axis=-1, keepdims=True) - s_min = rcond * s_max - - inv_s = tf.where(s >= s_min, tf.reciprocal(s), 0) - - x = tf.einsum( - '...MK,...MN->...KN', - v, - tf.einsum('...K,...MK,...MN->...KN', inv_s, u, b) - ) - - return tf.conj(x) diff --git a/batchglm/train/tf1/train.py b/batchglm/train/tf1/train.py deleted file mode 100644 index 151343f0..00000000 --- a/batchglm/train/tf1/train.py +++ /dev/null @@ -1,315 +0,0 @@ -import contextlib -import logging -import tensorflow as tf -from typing import Union, Dict - -logger = logging.getLogger(__name__) - - -class MultiTrainer: - - def __init__( - self, - learning_rate, - loss=None, - variables: tf.Variable = None, - gradients: tf.Tensor = None, - apply_gradients: Union[callable, Dict[tf.Variable, callable]] = None, - newton_delta: tf.Tensor = None, - irls_delta: tf.Tensor = None, - irls_gd_delta: tf.Tensor = None, - train_ops_nr_tr=None, - train_ops_irls_tr=None, - train_ops_irls_gd_tr=None, - global_step=None, - apply_train_ops: callable = None, - provide_optimizers: Union[dict, None] = None, - session = None, - name=None - ): - r""" - - :param learning_rate: learning rate used for training - :param loss: loss which should be minimized - :param variables: list of variables which will be trained - :param gradients: tensor of gradients of loss function with respect to trained parameters. - If gradients is not given, gradients are computed via tensorflow based on the given loss. - :param apply_gradients: callable(s) appliable to the gradients. - Can be either a single callable which will be applied to all gradients or a dict of - {tf1.Variable: callable} mappings. - :param newton_delta: tensor Precomputed custom newton-rhapson parameter update to apply. - :param irls_delta: tensor Precomputed custom IRLS parameter update to apply. - :param global_step: global step counter - :param apply_train_ops: callable which will be applied to all train ops - :param name: optional name scope - """ - self.session = session - with contextlib.ExitStack() as stack: - if name is not None: - gs = stack.enter_context(tf.name_scope(name)) - - if gradients is None: - if variables is None: - raise ValueError("Either variables and loss or gradients have to be specified") - - logger.debug(" **** Compute gradients using tensorflow") - plain_gradients = tf.gradients(loss, variables) - plain_gradients_vars = [(g, v) for g, v in zip(plain_gradients, variables)] - else: - plain_gradients_vars = [(gradients, variables)] - - if callable(apply_gradients): - gradients_vars = [(apply_gradients(g), v) for g, v in plain_gradients_vars] - elif isinstance(apply_gradients, dict): - gradients_vars = [(apply_gradients[v](g) if v in apply_gradients else g, v) for g, v in plain_gradients_vars] - else: - gradients_vars = plain_gradients_vars - - # Standard tensorflow optimizers. - if provide_optimizers["gd"]: - logger.debug(" *** Building optimizer: GD") - optim_GD = tf.compat.v1.train.GradientDescentOptimizer(learning_rate=learning_rate) - train_op_GD = optim_GD.apply_gradients(gradients_vars, global_step=global_step) - if apply_train_ops is not None: - train_op_GD = apply_train_ops(train_op_GD) - update_op_GD = tf.multiply(gradients, learning_rate) - else: - optim_GD = None - train_op_GD = None - update_op_GD = None - - if provide_optimizers["adam"]: - logger.debug(" *** Building optimizer: ADAM") - optim_Adam = tf.compat.v1.train.AdamOptimizer(learning_rate=learning_rate) - train_op_Adam = optim_Adam.apply_gradients(gradients_vars, global_step=global_step) - if apply_train_ops is not None: - train_op_Adam = apply_train_ops(train_op_Adam) - update_op_Adam = tf.multiply(gradients, learning_rate) # TODO replace by actual step - else: - optim_Adam = None - train_op_Adam = None - update_op_Adam = None - - if provide_optimizers["adagrad"]: - logger.debug(" *** Building optimizer: ADAGRAD") - optim_Adagrad = tf.compat.v1.train.AdagradOptimizer(learning_rate=learning_rate) - train_op_Adagrad = optim_Adagrad.apply_gradients(gradients_vars, global_step=global_step) - if apply_train_ops is not None: - train_op_Adagrad = apply_train_ops(train_op_Adagrad) - update_op_Adagrad = tf.multiply(gradients, learning_rate) # TODO replace by actual step - else: - optim_Adagrad = None - train_op_Adagrad = None - update_op_Adagrad = None - - if provide_optimizers["rmsprop"]: - logger.debug(" *** Building optimizer: RMSPROP") - optim_RMSProp = tf.compat.v1.train.RMSPropOptimizer(learning_rate=learning_rate) - train_op_RMSProp = optim_RMSProp.apply_gradients(gradients_vars, global_step=global_step) - if apply_train_ops is not None: - train_op_RMSProp = apply_train_ops(train_op_RMSProp) - update_op_RMSProp = tf.multiply(gradients, learning_rate) # TODO replace by actual step - else: - optim_RMSProp = None - train_op_RMSProp = None - update_op_RMSProp = None - - # Custom optimizers. - if provide_optimizers["nr"] and newton_delta is not None: - logger.debug(" *** Building optimizer: NR") - update_op_nr = newton_delta - - theta_new_nr = variables - newton_delta - train_op_nr = tf.group( - tf.compat.v1.assign(variables, theta_new_nr), - tf.compat.v1.assign_add(global_step, 1) - ) - if apply_train_ops is not None: - train_op_nr = apply_train_ops(train_op_nr) - else: - train_op_nr = None - update_op_nr = None - - if provide_optimizers["irls"] and irls_delta is not None: - logger.debug(" *** Building optimizer: IRLS") - update_op_irls = irls_delta - - theta_new_irls = variables - irls_delta - train_op_irls = tf.group( - tf.compat.v1.assign(variables, theta_new_irls), - tf.compat.v1.assign_add(global_step, 1) - ) - if apply_train_ops is not None: - train_op_irls = apply_train_ops(train_op_irls) - else: - train_op_irls = None - update_op_irls = None - - if provide_optimizers["irls_gd"] and irls_gd_delta is not None: - logger.debug(" *** Building optimizer: IRLS_GD") - update_op_irls_gd = irls_gd_delta - - theta_new_irls_gd = variables - irls_gd_delta - train_op_irls_gd = tf.group( - tf.compat.v1.assign(variables, theta_new_irls_gd), - tf.compat.v1.assign_add(global_step, 1) - ) - if apply_train_ops is not None: - train_op_irls_gd = apply_train_ops(train_op_irls_gd) - else: - train_op_irls_gd = None - update_op_irls_gd = None - - if provide_optimizers["nr_tr"] and train_ops_nr_tr is not None: - logger.debug(" *** Building optimizer: NR_TR") - train_op_nr_tr = {"trial_op": train_ops_nr_tr["trial_op"], - "update_op": tf.group(train_ops_nr_tr["update_op"], - tf.compat.v1.assign_add(global_step, 1))} - update_op_nr_tr = train_ops_nr_tr["update"] - else: - train_op_nr_tr = None - update_op_nr_tr = None - - if provide_optimizers["irls_tr"] and train_ops_irls_tr is not None: - logger.debug(" *** Building optimizer: IRLS_TR") - train_op_irls_tr = {"trial_op": train_ops_irls_tr["trial_op"], - "update_op": tf.group(train_ops_irls_tr["update_op"], - tf.compat.v1.assign_add(global_step, 1))} - update_op_irls_tr = train_ops_irls_tr["update"] - else: - train_op_irls_tr = None - update_op_irls_tr = None - - if provide_optimizers["irls_gd_tr"] and train_ops_irls_gd_tr is not None: - logger.debug(" *** Building optimizer: IRLS_GD_TR") - train_op_irls_gd_tr = {"trial_op": train_ops_irls_gd_tr["trial_op"], - "update_op": tf.group(train_ops_irls_gd_tr["update_op"], - tf.compat.v1.assign_add(global_step, 1))} - update_op_irls_gd_tr = train_ops_irls_gd_tr["update"] - else: - train_op_irls_gd_tr = None - update_op_irls_gd_tr = None - - self.global_step = global_step - self.plain_gradients = plain_gradients_vars - self.gradients = gradients_vars - - self.optim_GD = optim_GD - self.optim_Adam = optim_Adam - self.optim_Adagrad = optim_Adagrad - self.optim_RMSProp = optim_RMSProp - - self.train_op_GD = train_op_GD - self.train_op_Adam = train_op_Adam - self.train_op_Adagrad = train_op_Adagrad - self.train_op_RMSProp = train_op_RMSProp - self.train_op_nr = train_op_nr - self.train_op_nr_tr = train_op_nr_tr - self.train_op_irls = train_op_irls - self.train_op_irls_gd = train_op_irls_gd - self.train_op_irls_tr = train_op_irls_tr - self.train_op_irls_gd_tr = train_op_irls_gd_tr - - self.update_op_GD = update_op_GD - self.update_op_Adam = update_op_Adam - self.update_op_Adagrad = update_op_Adagrad - self.update_op_RMSProp = update_op_RMSProp - self.update_op_nr = update_op_nr - self.update_op_nr_tr = update_op_nr_tr - self.update_op_irls = update_op_irls - self.update_op_irls_gd = update_op_irls_gd - self.update_op_irls_tr = update_op_irls_tr - self.update_op_irls_gd_tr = update_op_irls_gd_tr - - #self.train_op_bfgs = train_op_bfgs - - - def train_op_by_name(self, name: str): - """ - Returns the train op specified by the provided name - - :param name: name of the requested train op. Can be: - - - "Adam" - - "Adagrad" - - "RMSprop" - - "GradientDescent" or "GD" - :return: train op - """ - name_lower = name.lower() - if name_lower == "gradient_descent" or name_lower == "gd": - if self.train_op_GD is None: - raise ValueError("Gradient decent not provided in initialization.") - return {"train": self.train_op_GD, "update": self.update_op_GD} - elif name_lower == "adam": - if self.train_op_Adam is None: - raise ValueError("Adam not provided in initialization.") - return {"train": self.train_op_Adam, "update": self.update_op_Adam} - elif name_lower == "adagrad": - if self.train_op_Adagrad is None: - raise ValueError("Adagrad decent not provided in initialization.") - return {"train": self.train_op_Adagrad, "update": self.update_op_Adagrad} - elif name_lower == "rmsprop": - if self.train_op_RMSProp is None: - raise ValueError("RMSProp decent not provided in initialization.") - return {"train": self.train_op_RMSProp, "update": self.update_op_RMSProp} - elif name_lower == "bfgs": - if self.train_op_bfgs is None: - raise ValueError("BFGS not provided in initialization.") - return {"train": self.train_op_bfgs, "update": self.update_op_bfgs} - elif name_lower.lower() == "newton" or \ - name_lower.lower() == "newton_raphson" or \ - name_lower.lower() == "nr": - if self.train_op_nr is None: - raise ValueError("Newton-rhapson not provided in initialization.") - return {"train": self.train_op_nr, "update": self.update_op_nr} - elif name_lower.lower() == "newton_tr" or \ - name_lower.lower() == "newton_raphson_tr" or \ - name_lower.lower() == "nr_tr": - if self.train_op_nr_tr is None: - raise ValueError("Newton-rhapson trust-region not provided in initialization.") - return {"train": self.train_op_nr_tr, "update": self.update_op_nr_tr} - elif name_lower.lower() == "irls" or \ - name_lower.lower() == "iwls": - if self.train_op_irls is None: - raise ValueError("IRLS not provided in initialization.") - return {"train": self.train_op_irls, "update": self.update_op_irls} - elif name_lower.lower() == "irls_gd" or \ - name_lower.lower() == "iwls_gd": - if self.train_op_irls_gd is None: - raise ValueError("IRLS_GD not provided in initialization.") - return {"train": self.train_op_irls_gd, "update": self.update_op_irls_gd} - elif name_lower.lower() == "irls_tr" or \ - name_lower.lower() == "iwls_tr": - if self.train_op_irls_tr is None: - raise ValueError("IRLS trust-region not provided in initialization.") - return {"train": self.train_op_irls_tr, "update": self.update_op_irls_tr} - elif name_lower.lower() == "irls_gd_tr" or \ - name_lower.lower() == "iwls_gd_tr": - if self.train_op_irls_gd_tr is None: - raise ValueError("IRLS_GD trust-region not provided in initialization.") - return {"train": self.train_op_irls_gd_tr, "update": self.update_op_irls_gd_tr} - else: - raise ValueError("Unknown optimizer %s" % name) - - def gradient_by_variable(self, variable: tf.Variable): - """ - Returns the gradient to a specific variable if existing in self.gradients - :param variable: the variable whose gradient is requested - :return: gradient tensor or None if not found - """ - for g, v in self.gradients: - if v is variable: - return g - return None - - def plain_gradient_by_variable(self, variable: tf.Variable): - """ - Returns the plain gradient to a specific variable if existing in self.plain_gradients - :param variable: the variable whose gradient is requested - :return: gradient tensor or None if not found - """ - for g, v in self.plain_gradients: - if v is variable: - return g - return None diff --git a/batchglm/unit_test/test_acc_analytic_glm_all.py b/batchglm/unit_test/test_acc_analytic_glm_all.py deleted file mode 100644 index 57e31e0d..00000000 --- a/batchglm/unit_test/test_acc_analytic_glm_all.py +++ /dev/null @@ -1,373 +0,0 @@ -import logging -import unittest -import numpy as np -import scipy.sparse - -import batchglm.api as glm -from batchglm.models.base_glm import _EstimatorGLM, _SimulatorGLM - -glm.setup_logging(verbosity="WARNING", stream="STDOUT") -logger = logging.getLogger(__name__) - - -class _TestAccuracyAnalyticGlmAllEstim(): - - estimator: _EstimatorGLM - sim: _SimulatorGLM - noise_model: str - - def __init__( - self, - simulator, - train_scale, - noise_model, - sparse, - init_a, - init_b - ): - self.sim = simulator - self.noise_model = noise_model - - if noise_model is None: - raise ValueError("noise_model is None") - else: - if noise_model == "nb": - from batchglm.api.models.tf1.glm_nb import Estimator, InputDataGLM - elif noise_model == "norm": - from batchglm.api.models import Estimator, InputDataGLM - elif noise_model == "beta": - from batchglm.api.models.tf1.glm_beta import Estimator, InputDataGLM - else: - raise ValueError("noise_model not recognized") - - batch_size = 500 - provide_optimizers = {"gd": True, "adam": True, "adagrad": True, "rmsprop": True, - "nr": False, "nr_tr": False, - "irls": False, "irls_gd": False, "irls_tr": False, "irls_gd_tr": False} - - if sparse: - input_data = InputDataGLM( - data=scipy.sparse.csr_matrix(simulator.input_data.x), - design_loc=simulator.input_data.design_loc, - design_scale=simulator.input_data.design_scale - ) - else: - input_data = InputDataGLM( - data=simulator.input_data.x, - design_loc=simulator.input_data.design_loc, - design_scale=simulator.input_data.design_scale - ) - - self.estimator = Estimator( - input_data=input_data, - batch_size=batch_size, - quick_scale=not train_scale, - provide_optimizers=provide_optimizers, - provide_batched=True, - provide_fim=False, - provide_hessian=False, - init_a=init_a, - init_b=init_b - ) - - def eval_estimation_a( - self, - init_a, - ): - if self.noise_model is None: - raise ValueError("noise_model is None") - else: - if self.noise_model == "nb": - threshold_dev = 1e-2 - threshold_std = 1e-1 - elif self.noise_model == "norm": - threshold_dev = 1e-2 - threshold_std = 1e-1 - elif self.noise_model == "beta": - threshold_dev = 1e-2 - threshold_std = 1e-1 - else: - raise ValueError("noise_model not recognized") - - if init_a == "standard": - mean_dev = np.mean(self.estimator.model.a_var[0, :] - self.sim.a_var[0, :]) - std_dev = np.std(self.estimator.model.a_var[0, :] - self.sim.a_var[0, :]) - elif init_a == "closed_form": - mean_dev = np.mean(self.estimator.model.a_var - self.sim.a_var) - std_dev = np.std(self.estimator.model.a_var - self.sim.a_var) - else: - assert False - - logging.getLogger("batchglm").info("mean_dev_a %f" % mean_dev) - logging.getLogger("batchglm").info("std_dev_a %f" % std_dev) - - if np.abs(mean_dev) < threshold_dev and \ - std_dev < threshold_std: - return True - else: - return False - - def eval_estimation_b( - self, - init_b - ): - if self.noise_model is None: - raise ValueError("noise_model is None") - else: - if self.noise_model == "nb": - threshold_dev = 1e-2 - threshold_std = 1e-1 - elif self.noise_model == "norm": - threshold_dev = 1e-2 - threshold_std = 1e-1 - elif self.noise_model == "beta": - threshold_dev = 1e-2 - threshold_std = 1e-1 - else: - raise ValueError("noise_model not recognized") - - if init_b == "standard": - mean_dev = np.mean(self.estimator.b_var[0, :] - self.sim.b[0, :]) - std_dev = np.std(self.estimator.b_var[0, :] - self.sim.b[0, :]) - elif init_b == "closed_form": - mean_dev = np.mean(self.estimator.b_var - self.sim.b) - std_dev = np.std(self.estimator.b_var - self.sim.b) - else: - assert False - - logging.getLogger("batchglm").info("mean_dev_b %f" % mean_dev) - logging.getLogger("batchglm").info("std_dev_b %f" % std_dev) - - if np.abs(mean_dev) < threshold_dev and \ - std_dev < threshold_std: - return True - else: - return False - - -class TestAccuracyAnalyticGlmAll( - unittest.TestCase -): - noise_model: str - - def get_simulator(self): - if self.noise_model is None: - raise ValueError("noise_model is None") - else: - if self.noise_model == "nb": - from batchglm.api.models.tf1.glm_nb import Simulator - elif self.noise_model == "norm": - from batchglm.api.models import Simulator - elif self.noise_model == "beta": - from batchglm.api.models.tf1.glm_beta import Simulator - else: - raise ValueError("noise_model not recognized") - - return Simulator( - num_observations=100000, - num_features=3 - ) - - def get_estimator(self, train_scale, sparse, init_a, init_b): - return _TestAccuracyAnalyticGlmAllEstim( - simulator=self.sim, - train_scale=train_scale, - noise_model=self.noise_model, - sparse=sparse, - init_a=init_a, - init_b=init_b - ) - - def simulate_complex(self): - self.sim = self.get_simulator() - self.sim.generate_sample_description(num_batches=1, num_conditions=2) - - def rand_fn_ave(shape): - if self.noise_model in ["nb", "norm"]: - theta = np.random.uniform(10, 1000, shape) - elif self.noise_model in ["beta"]: - theta = np.random.uniform(0.1, 0.7, shape) - else: - raise ValueError("noise model not recognized") - return theta - - def rand_fn_loc(shape): - if self.noise_model in ["nb", "norm"]: - theta = np.random.uniform(1, 3, shape) - elif self.noise_model in ["beta"]: - theta = np.random.uniform(0, 0.15, shape) - else: - raise ValueError("noise model not recognized") - return theta - - def rand_fn_scale(shape): - theta = np.zeros(shape) - if self.noise_model in ["nb"]: - theta[0, :] = np.random.uniform(1, 3, shape[1]) - elif self.noise_model in ["norm"]: - theta[0, :] = np.random.uniform(1, 2, shape[1]) - elif self.noise_model in ["beta"]: - theta[0, :] = np.random.uniform(0.2, 0.4, shape[1]) - else: - raise ValueError("noise model not recognized") - return theta - - self.sim.generate_params( - rand_fn_ave=lambda shape: rand_fn_ave(shape), - rand_fn_loc=lambda shape: rand_fn_loc(shape), - rand_fn_scale=lambda shape: rand_fn_scale(shape) - ) - self.sim.generate_data() - - def simulate_easy(self): - self.sim = self.get_simulator() - self.sim.generate_sample_description(num_batches=1, num_conditions=1) - - def rand_fn_ave(shape): - if self.noise_model in ["nb", "norm"]: - theta = np.random.uniform(10, 1000, shape) - elif self.noise_model in ["beta"]: - theta = np.random.uniform(0.1, 0.9, shape) - else: - raise ValueError("noise model not recognized") - return theta - - def rand_fn_loc(shape): - return np.ones(shape) - - def rand_fn_scale(shape): - theta = np.zeros(shape) - if self.noise_model in ["nb"]: - theta[0, :] = np.random.uniform(1, 3, shape[1]) - elif self.noise_model in ["norm"]: - theta[0, :] = np.random.uniform(1, 2, shape[1]) - elif self.noise_model in ["beta"]: - theta[0, :] = np.random.uniform(0.2, 0.4, shape[1]) - else: - raise ValueError("noise model not recognized") - return theta - - self.sim.generate_params( - rand_fn_ave=lambda shape: rand_fn_ave(shape), - rand_fn_loc=lambda shape: rand_fn_loc(shape), - rand_fn_scale=lambda shape: rand_fn_scale(shape) - ) - self.sim.generate_data() - assert self.sim.input_data.design_loc.shape[1] == 1, "confounders include in intercept-only simulation" - assert self.sim.input_data.design_scale.shape[1] == 1, "confounders include in intercept-only simulation" - - def _test_a_and_b(self, sparse, init_a, init_b): - estimator = self.get_estimator( - train_scale=False, - sparse=sparse, - init_a=init_a, - init_b=init_b - ) - estimator.estimator.initialize() - estimator.estimator.finalize() - success = estimator.eval_estimation_a( - init_a=init_a, - ) - assert success, "estimation for a_model was inaccurate" - success = estimator.eval_estimation_b( - init_b=init_b - ) - assert success, "estimation for b_model was inaccurate" - return True - - -class TestAccuracyAnalyticGlmNb( - TestAccuracyAnalyticGlmAll, - unittest.TestCase -): - """ - Test whether optimizers yield exact results for negative binomial data. - """ - - def test_a_closed_b_closed(self): - logging.getLogger("tensorflow").setLevel(logging.ERROR) - logging.getLogger("batchglm").setLevel(logging.INFO) - logger.error("TestAccuracyAnalyticGlmNb.test_a_closed_b_closed()") - - np.random.seed(1) - self.noise_model = "nb" - self.simulate_complex() - self._test_a_and_b(sparse=False, init_a="closed_form", init_b="closed_form") - self._test_a_and_b(sparse=True, init_a="closed_form", init_b="closed_form") - - def test_a_standard_b_standard(self): - logging.getLogger("tensorflow").setLevel(logging.ERROR) - logging.getLogger("batchglm").setLevel(logging.INFO) - logger.error("TestAccuracyAnalyticGlmNb.test_a_standard_b_standard()") - - np.random.seed(1) - self.noise_model = "nb" - self.simulate_easy() - self._test_a_and_b(sparse=False, init_a="standard", init_b="standard") - self._test_a_and_b(sparse=True, init_a="standard", init_b="standard") - - -class TestAccuracyAnalyticGlmNorm( - TestAccuracyAnalyticGlmAll, - unittest.TestCase -): - """ - Test whether optimizers yield exact results for normally distributed data. - """ - - def test_a_closed_b_closed(self): - logging.getLogger("tensorflow").setLevel(logging.ERROR) - logging.getLogger("batchglm").setLevel(logging.INFO) - logger.error("TestAccuracyAnalyticGlmNorm.test_a_closed_b_closed()") - - np.random.seed(1) - self.noise_model = "norm" - self.simulate_complex() - self._test_a_and_b(sparse=False, init_a="closed_form", init_b="closed_form") - self._test_a_and_b(sparse=True, init_a="closed_form", init_b="closed_form") - - def test_a_standard_b_standard(self): - logging.getLogger("tensorflow").setLevel(logging.ERROR) - logging.getLogger("batchglm").setLevel(logging.INFO) - logger.error("TestAccuracyAnalyticGlmNorm.test_a_standard_b_standard()") - - np.random.seed(1) - self.noise_model = "norm" - self.simulate_easy() - self._test_a_and_b(sparse=False, init_a="standard", init_b="standard") - self._test_a_and_b(sparse=True, init_a="standard", init_b="standard") - - -class TestAccuracyAnalyticGlmBeta( - TestAccuracyAnalyticGlmAll, - unittest.TestCase -): - """ - Test whether optimizers yield exact results for beta distributed data. - """ - - def test_a_closed_b_closed(self): - logging.getLogger("tensorflow").setLevel(logging.ERROR) - logging.getLogger("batchglm").setLevel(logging.INFO) - logger.error("TestAccuracyAnalyticGlmBeta.test_a_closed_b_closed()") - - np.random.seed(1) - self.noise_model = "beta" - self.simulate_complex() - self._test_a_and_b(sparse=False, init_a="closed_form", init_b="closed_form") - self._test_a_and_b(sparse=True, init_a="closed_form", init_b="closed_form") - - def test_a_standard_b_standard(self): - logging.getLogger("tensorflow").setLevel(logging.ERROR) - logging.getLogger("batchglm").setLevel(logging.INFO) - logger.error("TestAccuracyAnalyticGlmBeta.test_a_standard_b_standard()") - - np.random.seed(1) - self.noise_model = "beta" - self.simulate_easy() - self._test_a_and_b(sparse=False, init_a="standard", init_b="standard") - self._test_a_and_b(sparse=True, init_a="standard", init_b="standard") - - -if __name__ == '__main__': - unittest.main() diff --git a/batchglm/unit_test/test_acc_constrained_vglm_all.py b/batchglm/unit_test/test_acc_constrained_vglm_all.py deleted file mode 100644 index 1723a325..00000000 --- a/batchglm/unit_test/test_acc_constrained_vglm_all.py +++ /dev/null @@ -1,140 +0,0 @@ -import logging -import numpy as np -import unittest - -import batchglm.api as glm -from batchglm.unit_test.test_acc_glm_all import _TestAccuracyGlmAll - -glm.setup_logging(verbosity="WARNING", stream="STDOUT") -logger = logging.getLogger(__name__) - - -class _TestAccuracyVglmAll(_TestAccuracyGlmAll): - - def simulate(self): - super().simulate() - # Override design matrix of simulation 1 to encode constraints - dmat = np.hstack([ - self.sim1.input_data.design_loc, - np.expand_dims(self.sim1.input_data.design_loc[:, 0] - - self.sim1.input_data.design_loc[:, -1], axis=-1) - ]) - constraints = np.zeros([4, 3]) - constraints[0, 0] = 1 - constraints[1, 1] = 1 - constraints[2, 2] = 1 - constraints[3, 2] = -1 - new_coef_names = ['Intercept', 'condition[T.1]', 'batch[1]', 'batch[2]'] - self.sim1.input_data.design_loc = dmat - self.sim1.input_data.design_scale = dmat - self.sim1.input_data._design_loc_names = new_coef_names - self.sim1.input_data._design_scale_names = new_coef_names - self.sim1.input_data.constraints_loc = constraints - self.sim1.input_data.constraints_scale = constraints - - def _test_full(self, sparse): - self._test_full_a_and_b(sparse=sparse) - self._test_full_a_only(sparse=sparse) - - def _test_batched(self, sparse): - self._test_batched_a_and_b(sparse=sparse) - self._test_batched_a_only(sparse=sparse) - - -class TestAccuracyVglmNb( - _TestAccuracyVglmAll, - unittest.TestCase -): - """ - Test whether optimizers yield exact results for negative binomial distributed data. - """ - - def test_full_nb(self): - logging.getLogger("tensorflow").setLevel(logging.INFO) - logging.getLogger("batchglm").setLevel(logging.INFO) - logger.error("TestAccuracyVglmNb.test_full_nb()") - - np.random.seed(1) - self.noise_model = "nb" - self.simulate() - self._test_full(sparse=False) - self._test_full(sparse=True) - - def test_batched_nb(self): - logging.getLogger("tensorflow").setLevel(logging.INFO) - logging.getLogger("batchglm").setLevel(logging.INFO) - logger.error("TestAccuracyVglmNb.test_batched_nb()") - - np.random.seed(1) - self.noise_model = "nb" - self.simulate() - self._test_batched(sparse=False) - self._test_batched(sparse=True) - - -class TestAccuracyVglmNorm( - _TestAccuracyGlmAll, - unittest.TestCase -): - """ - Test whether optimizers yield exact results for normal distributed data. - # TODO not tested yet. - """ - - def test_full_norm(self): - logging.getLogger("tensorflow").setLevel(logging.INFO) - logging.getLogger("batchglm").setLevel(logging.INFO) - logger.error("TestAccuracyVglmNorm.test_full_norm()") - - np.random.seed(1) - self.noise_model = "norm" - self.simulate() - self._test_full(sparse=False) - self._test_full(sparse=True) - - def test_batched_norm(self): - logging.getLogger("tensorflow").setLevel(logging.INFO) - logging.getLogger("batchglm").setLevel(logging.INFO) - logger.error("TestAccuracyVglmNorm.test_batched_norm()") - - np.random.seed(1) - self.noise_model = "norm" - self.simulate() - self._test_batched(sparse=False) - self._test_batched(sparse=True) - - -class TestAccuracyVglmBeta( - _TestAccuracyGlmAll, - unittest.TestCase -): - """ - Test whether optimizers yield exact results for beta distributed data. - TODO not working yet. - """ - - def test_full_beta(self): - logging.getLogger("tensorflow").setLevel(logging.INFO) - logging.getLogger("batchglm").setLevel(logging.INFO) - logger.error("TestAccuracyVglmBeta.test_full_beta()") - - np.random.seed(1) - self.noise_model = "beta" - self.simulate() - self._test_full(sparse=False) - self._test_full(sparse=True) - - def test_batched_beta(self): - logging.getLogger("tensorflow").setLevel(logging.INFO) - logging.getLogger("batchglm").setLevel(logging.INFO) - logger.error("TestAccuracyVglmBeta.test_batched_beta()") - - np.random.seed(1) - self.noise_model = "beta" - self.simulate() - self._test_batched(sparse=False) - self._test_batched(sparse=True) - - -if __name__ == '__main__': - unittest.main() diff --git a/batchglm/unit_test/test_acc_glm_all.py b/batchglm/unit_test/test_acc_glm_all.py deleted file mode 100644 index 0dce6a43..00000000 --- a/batchglm/unit_test/test_acc_glm_all.py +++ /dev/null @@ -1,528 +0,0 @@ -import logging -import numpy as np -import scipy.sparse -import unittest - -import batchglm.api as glm - -glm.setup_logging(verbosity="WARNING", stream="STDOUT") -logger = logging.getLogger(__name__) - - -class _TestAccuracyGlmAllEstim: - - def __init__( - self, - simulator, - quick_scale, - noise_model, - sparse, - init_mode - ): - if noise_model is None: - raise ValueError("noise_model is None") - else: - if noise_model == "nb": - from batchglm.api.models.tf1.glm_nb import Estimator, InputDataGLM - elif noise_model == "norm": - from batchglm.api.models import Estimator, InputDataGLM - elif noise_model == "beta": - from batchglm.api.models.tf1.glm_beta import Estimator, InputDataGLM - else: - raise ValueError("noise_model not recognized") - - batch_size = 2000 - provide_optimizers = { - "gd": True, - "adam": True, - "adagrad": True, - "rmsprop": True, - "nr": True, - "nr_tr": True, - "irls": noise_model in ["nb", "norm"], - "irls_gd": noise_model in ["nb", "norm"], - "irls_tr": noise_model in ["nb", "norm"], - "irls_gd_tr": noise_model in ["nb", "norm"] - } - - if sparse: - input_data = InputDataGLM( - data=scipy.sparse.csr_matrix(simulator.input_data.x), - design_loc=simulator.input_data.design_loc, - design_scale=simulator.input_data.design_scale, - design_loc_names=simulator.input_data.design_loc_names, - design_scale_names=simulator.input_data.design_scale_names, - constraints_loc=simulator.input_data.constraints_loc, - constraints_scale=simulator.input_data.constraints_scale, - size_factors=simulator.input_data.size_factors, - as_dask=False - ) - else: - input_data = InputDataGLM( - data=simulator.input_data.x, - design_loc=simulator.input_data.design_loc, - design_scale=simulator.input_data.design_scale, - design_loc_names=simulator.input_data.design_loc_names, - design_scale_names=simulator.input_data.design_scale_names, - constraints_loc=simulator.input_data.constraints_loc, - constraints_scale=simulator.input_data.constraints_scale, - size_factors=simulator.input_data.size_factors, - as_dask=False - ) - - self.estimator = Estimator( - input_data=input_data, - batch_size=batch_size, - quick_scale=quick_scale, - provide_optimizers=provide_optimizers, - provide_batched=True, - provide_fim=noise_model in ["nb", "norm"], - provide_hessian=True, - init_a=init_mode, - init_b=init_mode - ) - self.sim = simulator - - def estimate( - self, - algo, - batched, - acc, - lr - ): - self.estimator.initialize() - self.estimator.train_sequence(training_strategy=[ - { - "learning_rate": lr, - "convergence_criteria": "all_converged", - "stopping_criteria": acc, - "use_batching": batched, - "optim_algo": algo, - }, - ]) - - def eval_estimation( - self, - batched, - train_loc, - train_scale - ): - if batched: - threshold_dev_a = 0.4 - threshold_dev_b = 0.4 - threshold_std_a = 2 - threshold_std_b = 2 - else: - threshold_dev_a = 0.2 - threshold_dev_b = 0.2 - threshold_std_a = 1 - threshold_std_b = 1 - - success = True - if train_loc: - mean_rel_dev_a = np.mean((self.estimator.model.a_var - self.sim.a_var) / self.sim.a_var) - std_rel_dev_a = np.std((self.estimator.model.a_var - self.sim.a_var) / self.sim.a_var) - - logging.getLogger("batchglm").info("mean_rel_dev_a %f" % mean_rel_dev_a) - logging.getLogger("batchglm").info("std_rel_dev_a %f" % std_rel_dev_a) - - if np.abs(mean_rel_dev_a) > threshold_dev_a or std_rel_dev_a > threshold_std_a: - success = False - if train_scale: - mean_rel_dev_b = np.mean((self.estimator.model.b_var - self.sim.b_var) / self.sim.b_var) - std_rel_dev_b = np.std((self.estimator.model.b_var - self.sim.b_var) / self.sim.b_var) - - logging.getLogger("batchglm").info("mean_rel_dev_b %f" % mean_rel_dev_b) - logging.getLogger("batchglm").info("std_rel_dev_b %f" % std_rel_dev_b) - - if np.abs(mean_rel_dev_b) > threshold_dev_b or std_rel_dev_b > threshold_std_b: - success = False - - return success - - -class _TestAccuracyGlmAll( - unittest.TestCase -): - """ - Test whether optimizers yield exact results. - - Accuracy is evaluted via deviation of simulated ground truth. - The unit tests test individual training graphs and multiple optimizers - (incl. one tensorflow internal optimizer and newton-rhapson) - for each training graph. The training graphs tested are as follows: - - - full data model - - train a and b model: test_full_global_a_and_b() - - train a model only: test_full_global_a_only() - - train b model only: test_full_global_b_only() - - batched data model - - train a and b model: test_batched_global_a_and_b() - - train a model only: test_batched_global_a_only() - - train b model only: test_batched_global_b_only() - - The unit tests throw an assertion error if the required accurcy is - not met. Accuracy thresholds are fairly lenient so that unit_tests - pass even with noise inherent in fast optimisation and random - initialisation in simulation. Still, large biases (i.e. graph errors) - should be discovered here. - - Note on settings by optimised: - - IRLS_TR: Needs slow TR collapse to converge. - """ - noise_model: str - optims_tested: dict - - def simulate(self): - self.simulate1() - self.simulate2() - - def get_simulator(self): - if self.noise_model is None: - raise ValueError("noise_model is None") - else: - if self.noise_model == "nb": - from batchglm.api.models.tf1.glm_nb import Simulator - elif self.noise_model == "norm": - from batchglm.api.models import Simulator - elif self.noise_model == "beta": - from batchglm.api.models.tf1.glm_beta import Simulator - else: - raise ValueError("noise_model not recognized") - - return Simulator(num_observations=10000, num_features=10) - - def simulate1(self): - self.sim1 = self.get_simulator() - self.sim1.generate_sample_description(num_batches=2, num_conditions=2) - - def rand_fn_ave(shape): - if self.noise_model in ["nb", "norm"]: - theta = np.random.uniform(10, 1000, shape) - elif self.noise_model in ["beta"]: - theta = np.random.uniform(0.1, 0.7, shape) - else: - raise ValueError("noise model not recognized") - return theta - - def rand_fn_loc(shape): - if self.noise_model in ["nb", "norm"]: - theta = np.random.uniform(1, 3, shape) - elif self.noise_model in ["beta"]: - theta = np.random.uniform(0, 0.15, shape) - else: - raise ValueError("noise model not recognized") - return theta - - def rand_fn_scale(shape): - if self.noise_model in ["nb"]: - theta = np.random.uniform(1, 3, shape) - elif self.noise_model in ["norm"]: - theta = np.random.uniform(1, 3, shape) - elif self.noise_model in ["beta"]: - theta = np.random.uniform(0, 0.15, shape) - else: - raise ValueError("noise model not recognized") - return theta - - self.sim1.generate_params( - rand_fn_ave=lambda shape: rand_fn_ave(shape), - rand_fn_loc=lambda shape: rand_fn_loc(shape), - rand_fn_scale=lambda shape: rand_fn_scale(shape) - ) - self.sim1.generate_data() - - def simulate2(self): - self.sim2 = self.get_simulator() - self.sim2.generate_sample_description(num_batches=0, num_conditions=2) - - def rand_fn_ave(shape): - if self.noise_model in ["nb", "norm"]: - theta = np.random.uniform(10, 1000, shape) - elif self.noise_model in ["beta"]: - theta = np.random.uniform(0.1, 0.9, shape) - else: - raise ValueError("noise model not recognized") - return theta - - def rand_fn_loc(shape): - if self.noise_model in ["nb", "norm"]: - theta = np.ones(shape) - elif self.noise_model in ["beta"]: - theta = np.zeros(shape)+0.05 - else: - raise ValueError("noise model not recognized") - return theta - - def rand_fn_scale(shape): - if self.noise_model in ["nb"]: - theta = np.ones(shape) - elif self.noise_model in ["norm"]: - theta = np.ones(shape) - elif self.noise_model in ["beta"]: - theta = np.ones(shape) - 0.8 - else: - raise ValueError("noise model not recognized") - return theta - - self.sim2.generate_params( - rand_fn_ave=lambda shape: rand_fn_ave(shape), - rand_fn_loc=lambda shape: rand_fn_loc(shape), - rand_fn_scale=lambda shape: rand_fn_scale(shape) - ) - self.sim2.generate_data() - - def simulator(self, train_loc): - if train_loc: - return self.sim1 - else: - return self.sim2 - - def basic_test( - self, - batched, - train_loc, - train_scale, - sparse - ): - self.optims_tested = { - "nb": ["ADAM", "IRLS_GD_TR"], - "beta": ["NR_TR"], - "norm": ["IRLS_TR"] - } - if self.noise_model in ["norm"]: - algos = self.optims_tested["norm"] - init_mode = "all_zero" - lr = {"ADAM": 1e-3, "NR_TR": 1, "IRLS_TR": 1} - elif self.noise_model in ["beta"]: - algos = self.optims_tested["beta"] - init_mode = "all_zero" - if batched: - lr = {"ADAM": 0.1, "NR_TR": 1} - else: - lr = {"ADAM": 1e-5, "NR_TR": 1} - elif self.noise_model in ["nb"]: - algos = self.optims_tested["nb"] - init_mode = "standard" - if batched: - lr = {"ADAM": 0.1, "IRLS_GD_TR": 1} - else: - lr = {"ADAM": 0.05, "IRLS_GD_TR": 1} - else: - raise ValueError("noise model %s not recognized" % self.noise_model) - - for algo in algos: - logger.info("algorithm: %s" % algo) - if algo in ["ADAM", "RMSPROP", "GD"]: - if batched: - acc = 1e-4 - else: - acc = 1e-6 - glm.pkg_constants.JACOBIAN_MODE = "analytic" - elif algo in ["NR", "NR_TR"]: - if batched: - acc = 1e-12 - else: - acc = 1e-14 - if self.noise_model in ["beta"]: - glm.pkg_constants.TRUST_REGION_RADIUS_INIT = 1 - else: - glm.pkg_constants.TRUST_REGION_RADIUS_INIT = 100 - glm.pkg_constants.TRUST_REGION_T1 = 0.5 - glm.pkg_constants.TRUST_REGION_T2 = 1.5 - glm.pkg_constants.CHOLESKY_LSTSQS = True - glm.pkg_constants.CHOLESKY_LSTSQS_BATCHED = True - glm.pkg_constants.JACOBIAN_MODE = "analytic" - glm.pkg_constants.HESSIAN_MODE = "analytic" - elif algo in ["IRLS", "IRLS_TR", "IRLS_GD", "IRLS_GD_TR"]: - if batched: - acc = 1e-12 - else: - acc = 1e-14 - glm.pkg_constants.TRUST_REGION_T1 = 0.5 - glm.pkg_constants.TRUST_REGION_T2 = 1.5 - glm.pkg_constants.CHOLESKY_LSTSQS = True - glm.pkg_constants.CHOLESKY_LSTSQS_BATCHED = True - glm.pkg_constants.JACOBIAN_MODE = "analytic" - else: - return ValueError("algo %s not recognized" % algo) - estimator = _TestAccuracyGlmAllEstim( - simulator=self.simulator(train_loc=train_loc), - quick_scale=False if train_scale else True, - noise_model=self.noise_model, - sparse=sparse, - init_mode=init_mode - ) - estimator.estimate( - algo=algo, - batched=batched, - acc=acc, - lr=lr[algo] - ) - estimator.estimator.finalize() - success = estimator.eval_estimation( - batched=batched, - train_loc=train_loc, - train_scale=train_scale, - ) - assert success, "%s did not yield exact results" % algo - - return True - - def _test_full_a_and_b(self, sparse): - return self.basic_test( - batched=False, - train_loc=True, - train_scale=True, - sparse=sparse - ) - - def _test_full_a_only(self, sparse): - return self.basic_test( - batched=False, - train_loc=True, - train_scale=False, - sparse=sparse - ) - - def _test_full_b_only(self, sparse): - return self.basic_test( - batched=False, - train_loc=False, - train_scale=True, - sparse=sparse - ) - - def _test_batched_a_and_b(self, sparse): - return self.basic_test( - batched=True, - train_loc=True, - train_scale=True, - sparse=sparse - ) - - def _test_batched_a_only(self, sparse): - return self.basic_test( - batched=True, - train_loc=True, - train_scale=False, - sparse=sparse - ) - - def _test_batched_b_only(self, sparse): - return self.basic_test( - batched=True, - train_loc=False, - train_scale=True, - sparse=sparse - ) - - def _test_full(self, sparse): - self._test_full_a_and_b(sparse=sparse) - self._test_full_a_only(sparse=sparse) - self._test_full_b_only(sparse=sparse) - - def _test_batched(self, sparse): - self._test_batched_a_and_b(sparse=sparse) - self._test_batched_a_only(sparse=sparse) - self._test_batched_b_only(sparse=sparse) - - -class TestAccuracyGlmNb( - _TestAccuracyGlmAll, - unittest.TestCase -): - """ - Test whether optimizers yield exact results for negative binomial distributed data. - """ - - def test_full_nb(self): - logging.getLogger("tensorflow").setLevel(logging.INFO) - logging.getLogger("batchglm").setLevel(logging.INFO) - logger.error("TestAccuracyGlmNb.test_full_nb()") - - np.random.seed(1) - self.noise_model = "nb" - self.simulate() - self._test_full(sparse=False) - self._test_full(sparse=True) - - def test_batched_nb(self): - logging.getLogger("tensorflow").setLevel(logging.INFO) - logging.getLogger("batchglm").setLevel(logging.INFO) - logger.error("TestAccuracyGlmNb.test_batched_nb()") - - np.random.seed(1) - self.noise_model = "nb" - self.simulate() - self._test_batched(sparse=False) - self._test_batched(sparse=True) - - -class TestAccuracyGlmNorm( - _TestAccuracyGlmAll, - unittest.TestCase -): - """ - Test whether optimizers yield exact results for normal distributed data. - """ - - def test_full_norm(self): - logging.getLogger("tensorflow").setLevel(logging.INFO) - logging.getLogger("batchglm").setLevel(logging.INFO) - logger.error("TestAccuracyGlmNorm.test_full_norm()") - - np.random.seed(1) - self.noise_model = "norm" - self.simulate() - self._test_full(sparse=False) - self._test_full(sparse=True) - - def test_batched_norm(self): - logging.getLogger("tensorflow").setLevel(logging.INFO) - logging.getLogger("batchglm").setLevel(logging.INFO) - logger.error("TestAccuracyGlmNorm.test_batched_norm()") - # TODO not working yet. - - np.random.seed(1) - self.noise_model = "norm" - self.simulate() - self._test_batched(sparse=False) - self._test_batched(sparse=True) - - -class TestAccuracyGlmBeta( - _TestAccuracyGlmAll, - unittest.TestCase -): - """ - Test whether optimizers yield exact results for beta distributed data. - TODO not working yet. - """ - - def test_full_beta(self): - logging.getLogger("tensorflow").setLevel(logging.INFO) - logging.getLogger("batchglm").setLevel(logging.INFO) - logger.error("TestAccuracyGlmBeta.test_full_beta()") - - np.random.seed(1) - self.noise_model = "beta" - self.simulate() - self._test_full(sparse=False) - self._test_full(sparse=True) - - def test_batched_beta(self): - logging.getLogger("tensorflow").setLevel(logging.INFO) - logging.getLogger("batchglm").setLevel(logging.INFO) - logger.error("TestAccuracyGlmBeta.test_batched_beta()") - - np.random.seed(1) - self.noise_model = "beta" - self.simulate() - self._test_batched(sparse=False) - self._test_batched(sparse=True) - - -if __name__ == '__main__': - unittest.main() diff --git a/batchglm/unit_test/test_acc_glm_all_tf2.py b/batchglm/unit_test/test_acc_glm_all_tf2.py deleted file mode 100644 index f4ab16fb..00000000 --- a/batchglm/unit_test/test_acc_glm_all_tf2.py +++ /dev/null @@ -1,524 +0,0 @@ -import logging -import numpy as np -import scipy.sparse -import unittest - -import batchglm.api as glm - -glm.setup_logging(verbosity="WARNING", stream="STDOUT") -logger = logging.getLogger(__name__) - - -class _TestAccuracyGlmAllEstim: - - def __init__( - self, - simulator, - quick_scale, - noise_model, - sparse, - init_mode - ): - if noise_model is None: - raise ValueError("noise_model is None") - else: - if noise_model == "nb": - from batchglm.api.models.glm_nb import Estimator, InputDataGLM - elif noise_model == "norm": - from batchglm.api.models.glm_norm import Estimator, InputDataGLM - elif noise_model == "beta": - from batchglm.api.models.glm_beta import Estimator, InputDataGLM - else: - raise ValueError("noise_model not recognized") - - batch_size = 2000 - provide_optimizers = { - "gd": True, - "adam": True, - "adagrad": True, - "rmsprop": True, - "nr": True, - "nr_tr": True, - "irls": noise_model in ["nb", "norm"], - "irls_gd": noise_model in ["nb", "norm"], - "irls_tr": noise_model in ["nb", "norm"], - "irls_gd_tr": noise_model in ["nb", "norm"] - } - - if sparse: - input_data = InputDataGLM( - data=scipy.sparse.csr_matrix(simulator.input_data.x), - design_loc=simulator.input_data.design_loc, - design_scale=simulator.input_data.design_scale, - constraints_loc=simulator.input_data.constraints_loc, - constraints_scale=simulator.input_data.constraints_scale, - size_factors=simulator.input_data.size_factors - ) - else: - input_data = InputDataGLM( - data=simulator.input_data.x, - design_loc=simulator.input_data.design_loc, - design_scale=simulator.input_data.design_scale, - constraints_loc=simulator.input_data.constraints_loc, - constraints_scale=simulator.input_data.constraints_scale, - size_factors=simulator.input_data.size_factors - ) - - self.estimator = Estimator( - input_data=input_data, - #batch_size=batch_size, - quick_scale=quick_scale, - #provide_optimizers=provide_optimizers, - #provide_batched=True, - #provide_fim=noise_model in ["nb", "norm"], - #provide_hessian=True, - init_a=init_mode, - init_b=init_mode - ) - self.sim = simulator - - def estimate( - self, - algo, - batched, - acc, - lr - ): - self.estimator.initialize() - self.estimator.train_sequence(training_strategy=[ - { - "learning_rate": lr, - "convergence_criteria": "all_converged", - "stopping_criteria": acc, - "use_batching": batched, - "optim_algo": algo, - "featurewise": False - }, - ]) - - def eval_estimation( - self, - batched, - train_loc, - train_scale - ): - if batched: - threshold_dev_a = 0.4 - threshold_dev_b = 0.4 - threshold_std_a = 2 - threshold_std_b = 2 - else: - threshold_dev_a = 0.2 - threshold_dev_b = 0.2 - threshold_std_a = 1 - threshold_std_b = 1 - - success = True - if train_loc: - mean_rel_dev_a = np.mean((self.estimator.model.a_var - self.sim.a_var) / self.sim.a_var) - std_rel_dev_a = np.std((self.estimator.model.a_var - self.sim.a_var) / self.sim.a_var) - - logging.getLogger("batchglm").info("mean_rel_dev_a %f" % mean_rel_dev_a) - logging.getLogger("batchglm").info("std_rel_dev_a %f" % std_rel_dev_a) - - if np.abs(mean_rel_dev_a) > threshold_dev_a or std_rel_dev_a > threshold_std_a: - success = False - if train_scale: - mean_rel_dev_b = np.mean((self.estimator.model.b_var - self.sim.b_var) / self.sim.b_var) - std_rel_dev_b = np.std((self.estimator.model.b_var - self.sim.b_var) / self.sim.b_var) - - logging.getLogger("batchglm").info("mean_rel_dev_b %f" % mean_rel_dev_b) - logging.getLogger("batchglm").info("std_rel_dev_b %f" % std_rel_dev_b) - - if np.abs(mean_rel_dev_b) > threshold_dev_b or std_rel_dev_b > threshold_std_b: - success = False - - return success - - -class _TestAccuracyGlmAll( - unittest.TestCase -): - """ - Test whether optimizers yield exact results. - - Accuracy is evaluted via deviation of simulated ground truth. - The unit tests test individual training graphs and multiple optimizers - (incl. one tensorflow internal optimizer and newton-rhapson) - for each training graph. The training graphs tested are as follows: - - - full data model - - train a and b model: test_full_global_a_and_b() - - train a model only: test_full_global_a_only() - - train b model only: test_full_global_b_only() - - batched data model - - train a and b model: test_batched_global_a_and_b() - - train a model only: test_batched_global_a_only() - - train b model only: test_batched_global_b_only() - - The unit tests throw an assertion error if the required accurcy is - not met. Accuracy thresholds are fairly lenient so that unit_tests - pass even with noise inherent in fast optimisation and random - initialisation in simulation. Still, large biases (i.e. graph errors) - should be discovered here. - - Note on settings by optimised: - - IRLS_TR: Needs slow TR collapse to converge. - """ - noise_model: str - optims_tested: dict - - def simulate(self): - self.simulate1() - self.simulate2() - - def get_simulator(self): - if self.noise_model is None: - raise ValueError("noise_model is None") - else: - if self.noise_model == "nb": - from batchglm.api.models.glm_nb import Simulator - elif self.noise_model == "norm": - from batchglm.api.models.glm_norm import Simulator - elif self.noise_model == "beta": - from batchglm.api.models.glm_beta import Simulator - else: - raise ValueError("noise_model not recognized") - - return Simulator(num_observations=10000, num_features=10) - - def simulate1(self): - self.sim1 = self.get_simulator() - self.sim1.generate_sample_description(num_batches=2, num_conditions=2) - - def rand_fn_ave(shape): - if self.noise_model in ["nb", "norm"]: - theta = np.random.uniform(10, 1000, shape) - elif self.noise_model in ["beta"]: - theta = np.random.uniform(0.1, 0.7, shape) - else: - raise ValueError("noise model not recognized") - return theta - - def rand_fn_loc(shape): - if self.noise_model in ["nb", "norm"]: - theta = np.random.uniform(1, 3, shape) - elif self.noise_model in ["beta"]: - theta = np.random.uniform(0, 0.15, shape) - else: - raise ValueError("noise model not recognized") - return theta - - def rand_fn_scale(shape): - if self.noise_model in ["nb"]: - theta = np.random.uniform(1, 3, shape) - elif self.noise_model in ["norm"]: - theta = np.random.uniform(1, 3, shape) - elif self.noise_model in ["beta"]: - theta = np.random.uniform(0, 0.15, shape) - else: - raise ValueError("noise model not recognized") - return theta - - self.sim1.generate_params( - rand_fn_ave=lambda shape: rand_fn_ave(shape), - rand_fn_loc=lambda shape: rand_fn_loc(shape), - rand_fn_scale=lambda shape: rand_fn_scale(shape) - ) - self.sim1.generate_data() - - def simulate2(self): - self.sim2 = self.get_simulator() - self.sim2.generate_sample_description(num_batches=0, num_conditions=2) - - def rand_fn_ave(shape): - if self.noise_model in ["nb", "norm"]: - theta = np.random.uniform(10, 1000, shape) - elif self.noise_model in ["beta"]: - theta = np.random.uniform(0.1, 0.9, shape) - else: - raise ValueError("noise model not recognized") - return theta - - def rand_fn_loc(shape): - if self.noise_model in ["nb", "norm"]: - theta = np.ones(shape) - elif self.noise_model in ["beta"]: - theta = np.zeros(shape)+0.05 - else: - raise ValueError("noise model not recognized") - return theta - - def rand_fn_scale(shape): - if self.noise_model in ["nb"]: - theta = np.ones(shape) - elif self.noise_model in ["norm"]: - theta = np.ones(shape) - elif self.noise_model in ["beta"]: - theta = np.ones(shape) - 0.8 - else: - raise ValueError("noise model not recognized") - return theta - - self.sim2.generate_params( - rand_fn_ave=lambda shape: rand_fn_ave(shape), - rand_fn_loc=lambda shape: rand_fn_loc(shape), - rand_fn_scale=lambda shape: rand_fn_scale(shape) - ) - self.sim2.generate_data() - - def simulator(self, train_loc): - if train_loc: - return self.sim1 - else: - return self.sim2 - - def basic_test( - self, - batched, - train_loc, - train_scale, - sparse - ): - self.optims_tested = { - "nb": ["ADAM", "IRLS_GD_TR"], - "beta": ["NR_TR"], - "norm": ["IRLS_TR"] - } - if self.noise_model in ["norm"]: - algos = self.optims_tested["norm"] - init_mode = "all_zero" - lr = {"ADAM": 1e-3, "NR_TR": 1, "IRLS_TR": 1} - elif self.noise_model in ["beta"]: - algos = self.optims_tested["beta"] - init_mode = "all_zero" - if batched: - lr = {"ADAM": 0.1, "NR_TR": 1} - else: - lr = {"ADAM": 1e-5, "NR_TR": 1} - elif self.noise_model in ["nb"]: - algos = self.optims_tested["nb"] - init_mode = "standard" - if batched: - lr = {"ADAM": 0.1, "IRLS_GD_TR": 1} - else: - lr = {"ADAM": 0.05, "IRLS_GD_TR": 1} - else: - raise ValueError("noise model %s not recognized" % self.noise_model) - - for algo in algos: - logger.info("algorithm: %s" % algo) - if algo in ["ADAM", "RMSPROP", "GD"]: - if batched: - acc = 1e-4 - else: - acc = 1e-6 - glm.pkg_constants.JACOBIAN_MODE = "analytic" - elif algo in ["NR", "NR_TR"]: - if batched: - acc = 1e-12 - else: - acc = 1e-14 - if self.noise_model in ["beta"]: - glm.pkg_constants.TRUST_REGION_RADIUS_INIT = 1 - else: - glm.pkg_constants.TRUST_REGION_RADIUS_INIT = 100 - glm.pkg_constants.TRUST_REGION_T1 = 0.5 - glm.pkg_constants.TRUST_REGION_T2 = 1.5 - glm.pkg_constants.CHOLESKY_LSTSQS = True - glm.pkg_constants.CHOLESKY_LSTSQS_BATCHED = True - glm.pkg_constants.JACOBIAN_MODE = "analytic" - glm.pkg_constants.HESSIAN_MODE = "analytic" - elif algo in ["IRLS", "IRLS_TR", "IRLS_GD", "IRLS_GD_TR"]: - if batched: - acc = 1e-12 - else: - acc = 1e-14 - glm.pkg_constants.TRUST_REGION_T1 = 0.5 - glm.pkg_constants.TRUST_REGION_T2 = 1.5 - glm.pkg_constants.CHOLESKY_LSTSQS = True - glm.pkg_constants.CHOLESKY_LSTSQS_BATCHED = True - glm.pkg_constants.JACOBIAN_MODE = "analytic" - else: - return ValueError("algo %s not recognized" % algo) - estimator = _TestAccuracyGlmAllEstim( - simulator=self.simulator(train_loc=train_loc), - quick_scale=False if train_scale else True, - noise_model=self.noise_model, - sparse=sparse, - init_mode=init_mode - ) - estimator.estimate( - algo=algo, - batched=batched, - acc=acc, - lr=lr[algo] - ) - estimator.estimator.finalize() - success = estimator.eval_estimation( - batched=batched, - train_loc=train_loc, - train_scale=train_scale, - ) - assert success, "%s did not yield exact results" % algo - - return True - - def _test_full_a_and_b(self, sparse): - return self.basic_test( - batched=False, - train_loc=True, - train_scale=True, - sparse=sparse - ) - - def _test_full_a_only(self, sparse): - return self.basic_test( - batched=False, - train_loc=True, - train_scale=False, - sparse=sparse - ) - - def _test_full_b_only(self, sparse): - return self.basic_test( - batched=False, - train_loc=False, - train_scale=True, - sparse=sparse - ) - - def _test_batched_a_and_b(self, sparse): - return self.basic_test( - batched=True, - train_loc=True, - train_scale=True, - sparse=sparse - ) - - def _test_batched_a_only(self, sparse): - return self.basic_test( - batched=True, - train_loc=True, - train_scale=False, - sparse=sparse - ) - - def _test_batched_b_only(self, sparse): - return self.basic_test( - batched=True, - train_loc=False, - train_scale=True, - sparse=sparse - ) - - def _test_full(self, sparse): - self._test_full_a_and_b(sparse=sparse) - self._test_full_a_only(sparse=sparse) - self._test_full_b_only(sparse=sparse) - - def _test_batched(self, sparse): - self._test_batched_a_and_b(sparse=sparse) - self._test_batched_a_only(sparse=sparse) - self._test_batched_b_only(sparse=sparse) - - -class TestAccuracyGlmNb( - _TestAccuracyGlmAll, - unittest.TestCase -): - """ - Test whether optimizers yield exact results for negative binomial distributed data. - """ - - def test_full_nb(self): - logging.getLogger("tensorflow").setLevel(logging.INFO) - logging.getLogger("batchglm").setLevel(logging.INFO) - logger.error("TestAccuracyGlmNb.test_full_nb()") - - np.random.seed(1) - self.noise_model = "nb" - self.simulate() - self._test_full(sparse=False) - self._test_full(sparse=True) - -""" - def test_batched_nb(self): - logging.getLogger("tensorflow").setLevel(logging.INFO) - logging.getLogger("batchglm").setLevel(logging.INFO) - logger.error("TestAccuracyGlmNb.test_batched_nb()") - - np.random.seed(1) - self.noise_model = "nb" - self.simulate() - self._test_batched(sparse=False) - self._test_batched(sparse=True) -""" -""" -class TestAccuracyGlmNorm( - _TestAccuracyGlmAll, - unittest.TestCase -): - - Test whether optimizers yield exact results for normal distributed data. - - - def test_full_norm(self): - logging.getLogger("tensorflow").setLevel(logging.INFO) - logging.getLogger("batchglm").setLevel(logging.INFO) - logger.error("TestAccuracyGlmNorm.test_full_norm()") - - np.random.seed(1) - self.noise_model = "norm" - self.simulate() - self._test_full(sparse=False) - self._test_full(sparse=True) - - def test_batched_norm(self): - logging.getLogger("tensorflow").setLevel(logging.INFO) - logging.getLogger("batchglm").setLevel(logging.INFO) - logger.error("TestAccuracyGlmNorm.test_batched_norm()") - # TODO not working yet. - - np.random.seed(1) - self.noise_model = "norm" - self.simulate() - self._test_batched(sparse=False) - self._test_batched(sparse=True) - - -class TestAccuracyGlmBeta( - _TestAccuracyGlmAll, - unittest.TestCase -): - - Test whether optimizers yield exact results for beta distributed data. - TODO not working yet. - - - def test_full_beta(self): - logging.getLogger("tensorflow").setLevel(logging.INFO) - logging.getLogger("batchglm").setLevel(logging.INFO) - logger.error("TestAccuracyGlmBeta.test_full_beta()") - - np.random.seed(1) - self.noise_model = "beta" - self.simulate() - self._test_full(sparse=False) - self._test_full(sparse=True) - - def test_batched_beta(self): - logging.getLogger("tensorflow").setLevel(logging.INFO) - logging.getLogger("batchglm").setLevel(logging.INFO) - logger.error("TestAccuracyGlmBeta.test_batched_beta()") - - np.random.seed(1) - self.noise_model = "beta" - self.simulate() - self._test_batched(sparse=False) - self._test_batched(sparse=True) -""" - -if __name__ == '__main__': - unittest.main() diff --git a/batchglm/unit_test/test_acc_sizefactors_glm_all.py b/batchglm/unit_test/test_acc_sizefactors_glm_all.py deleted file mode 100644 index e4bfb814..00000000 --- a/batchglm/unit_test/test_acc_sizefactors_glm_all.py +++ /dev/null @@ -1,103 +0,0 @@ -import logging -import numpy as np -import unittest - -import batchglm.api as glm -from batchglm.unit_test.test_acc_glm_all import _TestAccuracyGlmAll - -glm.setup_logging(verbosity="WARNING", stream="STDOUT") -logger = logging.getLogger(__name__) - - -class _TestAccuracyGlmAllSf(_TestAccuracyGlmAll): - - def simulate(self): - super().simulate() - # Add size factors into input data: Do not centre at 1 so that they bias MAD if something is off. - self.sim1.input_data.size_factors = np.random.uniform(1.5, 2., size=self.sim1.input_data.num_observations) - - def _test_full(self, sparse): - self._test_full_a_and_b(sparse=sparse) - - def _test_batched(self, sparse): - self._test_batched_a_and_b(sparse=sparse) - - -class TestAccuracyGlmNbSf( - _TestAccuracyGlmAllSf, - unittest.TestCase -): - """ - Test whether optimizers yield exact results for negative binomial distributed data. - """ - - def test_full_nb(self): - logging.getLogger("tensorflow").setLevel(logging.INFO) - logging.getLogger("batchglm").setLevel(logging.INFO) - logger.error("TestAccuracyGlmNbSf.test_full_nb()") - - np.random.seed(1) - self.noise_model = "nb" - self.simulate() - self._test_full(sparse=False) - self._test_full(sparse=True) - - def test_batched_nb(self): - logging.getLogger("tensorflow").setLevel(logging.INFO) - logging.getLogger("batchglm").setLevel(logging.INFO) - logger.error("TestAccuracyGlmNbSf.test_batched_nb()") - - np.random.seed(1) - self.noise_model = "nb" - self.simulate() - self._test_batched(sparse=False) - self._test_batched(sparse=True) - - -class TestAccuracyGlmNormSf( - _TestAccuracyGlmAll, - unittest.TestCase -): - """ - Test whether optimizers yield exact results for normal distributed data. - # TODO not tested yet. - """ - - def test_full_norm(self): - logging.getLogger("tensorflow").setLevel(logging.INFO) - logging.getLogger("batchglm").setLevel(logging.INFO) - logger.error("TestAccuracyGlmNormSf.test_full_norm()") - - np.random.seed(1) - self.noise_model = "norm" - self.simulate() - self._test_full(sparse=False) - self._test_full(sparse=True) - - def test_batched_norm(self): - logging.getLogger("tensorflow").setLevel(logging.INFO) - logging.getLogger("batchglm").setLevel(logging.INFO) - logger.error("TestAccuracyGlmNormSf.test_batched_norm()") - - np.random.seed(1) - self.noise_model = "norm" - self.simulate() - self._test_batched(sparse=False) - self._test_batched(sparse=True) - - -class TestAccuracyGlmBetaSf( - _TestAccuracyGlmAll, - unittest.TestCase -): - """ - Test whether optimizers yield exact results for beta distributed data. - Note: size factors are note implemented for beta distribution. - """ - - def test_dummy(self): - return True - - -if __name__ == '__main__': - unittest.main() diff --git a/batchglm/unit_test/test_hessians_glm_all.py b/batchglm/unit_test/test_hessians_glm_all.py deleted file mode 100644 index 1d0dbf36..00000000 --- a/batchglm/unit_test/test_hessians_glm_all.py +++ /dev/null @@ -1,187 +0,0 @@ -import logging -import unittest -import time -import numpy as np -import scipy.sparse - -import batchglm.data as data_utils -import batchglm.pkg_constants as pkg_constants - -from batchglm.models.base_glm import InputDataGLM - - -class Test_Hessians_GLM_ALL(unittest.TestCase): - noise_model: str - - def setUp(self): - pass - - def tearDown(self): - pass - - def simulate(self): - if self.noise_model is None: - raise ValueError("noise_model is None") - else: - if self.noise_model == "nb": - from batchglm.api.models.tf1.glm_nb import Simulator - elif self.noise_model == "norm": - from batchglm.api.models import Simulator - elif self.noise_model == "beta": - from batchglm.api.models.tf1.glm_beta import Simulator - else: - raise ValueError("noise_model not recognized") - - num_observations = 500 - sim = Simulator(num_observations=num_observations, num_features=4) - sim.generate_sample_description(num_conditions=2, num_batches=2) - sim.generate() - - self.sim = sim - - def get_hessians( - self, - input_data: InputDataGLM - ): - if self.noise_model is None: - raise ValueError("noise_model is None") - else: - if self.noise_model == "nb": - from batchglm.api.models.tf1.glm_nb import Estimator - elif self.noise_model == "norm": - from batchglm.api.models import Estimator - elif self.noise_model == "beta": - from batchglm.api.models.tf1.glm_beta import Estimator - else: - raise ValueError("noise_model not recognized") - - provide_optimizers = {"gd": True, "adam": True, "adagrad": True, "rmsprop": True, - "nr": False, "nr_tr": False, - "irls": False, "irls_gd": False, "irls_tr": False, "irls_gd_tr": False} - - estimator = Estimator( - input_data=input_data, - quick_scale=False, - provide_optimizers=provide_optimizers, - provide_fim=False, - provide_hessian=False, - init_a="standard", - init_b="standard" - ) - estimator.initialize() - estimator_store = estimator.finalize() - - return - estimator_store.fisher_inv - - def _test_compute_hessians(self, sparse): - if self.noise_model is None: - raise ValueError("noise_model is None") - else: - if self.noise_model=="nb": - from batchglm.api.models.tf1.glm_nb import Simulator, InputDataGLM - elif self.noise_model == "norm": - from batchglm.api.models import Simulator, InputDataGLM - elif self.noise_model == "beta": - from batchglm.api.models.tf1.glm_beta import Simulator, InputDataGLM - else: - raise ValueError("noise_model not recognized") - - num_observations = 500 - num_conditions = 2 - - sim = Simulator(num_observations=num_observations, num_features=4) - sim.generate_sample_description(num_conditions=num_conditions, num_batches=2) - sim.generate() - - sample_description = data_utils.sample_description_from_xarray(sim.data, dim="observations") - design_loc = data_utils.design_matrix(sample_description, formula="~ 1 + condition + batch") - design_scale = data_utils.design_matrix(sample_description, formula="~ 1 + condition") - - if sparse: - input_data = InputDataGLM( - data=scipy.sparse.csr_matrix(sim.X), - design_loc=design_loc, - design_scale=design_scale - ) - else: - input_data = InputDataGLM( - data=sim.X, - design_loc=design_loc, - design_scale=design_scale - ) - - # Compute hessian based on analytic solution. - pkg_constants.HESSIAN_MODE = "analytic" - t0_analytic = time.time() - h_analytic = self.get_hessians(input_data) - t1_analytic = time.time() - t_analytic = t1_analytic - t0_analytic - - # Compute hessian based on tensorflow auto-differentiation. - pkg_constants.HESSIAN_MODE = "tf1" - t0_tf = time.time() - h_tf = self.get_hessians(input_data) - t1_tf = time.time() - t_tf = t1_tf - t0_tf - - logging.getLogger("batchglm").info("run time observation batch-wise analytic solution: %f" % t_analytic) - logging.getLogger("batchglm").info("run time tensorflow solution: %f" % t_tf) - logging.getLogger("batchglm").info("MAD: %f" % np.max(np.abs((h_tf - h_analytic)))) - - #i = 1 - #print(h_tf[i, :, :]) - #print(h_analytic[i, :, :]) - #print(h_tf[i, :, :] - h_analytic[i, :, :]) - - # Make sure that hessians are not all zero which might make evaluation of equality difficult. - assert np.sum(np.abs(h_analytic)) > 1e-10, \ - "hessians too small to perform test: %f" % np.sum(np.abs(h_analytic)) - mad = np.max(np.abs(h_tf - h_analytic)) - assert mad < 1e-15, mad - return True - - -class Test_Hessians_GLM_NB(Test_Hessians_GLM_ALL, unittest.TestCase): - - def test_compute_hessians_nb(self): - logging.getLogger("tensorflow").setLevel(logging.ERROR) - logging.getLogger("batchglm").setLevel(logging.WARNING) - logging.getLogger("batchglm").error("Test_Hessians_GLM_NB.test_compute_hessians_nb()") - - self.noise_model = "nb" - self._test_compute_hessians(sparse=False) - #self._test_compute_hessians(sparse=False) # TODO tf1>=1.13 waiting for tf1.sparse.expand_dims to work - - return True - - -class Test_Hessians_GLM_NORM(Test_Hessians_GLM_ALL, unittest.TestCase): - - def test_compute_hessians_norm(self): - logging.getLogger("tensorflow").setLevel(logging.ERROR) - logging.getLogger("batchglm").setLevel(logging.WARNING) - logging.getLogger("batchglm").error("Test_Hessians_GLM_NORM.test_compute_hessians_norm()") - - self.noise_model = "norm" - self._test_compute_hessians(sparse=False) - #self._test_compute_hessians(sparse=False) # TODO tf1>=1.13 waiting for tf1.sparse.expand_dims to work - - return True - - -class Test_Hessians_GLM_BETA(Test_Hessians_GLM_ALL, unittest.TestCase): - - def test_compute_hessians_beta(self): - logging.getLogger("tensorflow").setLevel(logging.ERROR) - logging.getLogger("batchglm").setLevel(logging.WARNING) - logging.getLogger("batchglm").error("Test_Hessians_GLM_BETA.test_compute_hessians_beta()") - - self.noise_model = "beta" - self._test_compute_hessians(sparse=False) - #self._test_compute_hessians(sparse=False) # TODO tf1>=1.13 waiting for tf1.sparse.expand_dims to work - - return True - - -if __name__ == '__main__': - unittest.main() diff --git a/batchglm/unit_test/test_jacobians_glm_all.py b/batchglm/unit_test/test_jacobians_glm_all.py deleted file mode 100644 index 1605b19a..00000000 --- a/batchglm/unit_test/test_jacobians_glm_all.py +++ /dev/null @@ -1,192 +0,0 @@ -import logging -import unittest -import time -import numpy as np -import scipy.sparse - -import batchglm.data as data_utils -import batchglm.pkg_constants as pkg_constants - -from batchglm.models.base_glm import InputDataGLM - - -class Test_Jacobians_GLM_ALL(unittest.TestCase): - noise_model: str - - def setUp(self): - pass - - def tearDown(self): - pass - - def simulate(self): - if self.noise_model is None: - raise ValueError("noise_model is None") - else: - if self.noise_model == "nb": - from batchglm.api.models.tf1.glm_nb import Simulator - elif self.noise_model == "norm": - from batchglm.api.models import Simulator - elif self.noise_model == "beta": - from batchglm.api.models.tf1.glm_beta import Simulator - else: - raise ValueError("noise_model not recognized") - - num_observations = 500 - sim = Simulator(num_observations=num_observations, num_features=4) - sim.generate_sample_description(num_conditions=2, num_batches=2) - sim.generate() - - self.sim = sim - - def get_jacs( - self, - input_data: InputDataGLM - ): - if self.noise_model is None: - raise ValueError("noise_model is None") - else: - if self.noise_model == "nb": - from batchglm.api.models.tf1.glm_nb import Estimator - elif self.noise_model == "norm": - from batchglm.api.models import Estimator - elif self.noise_model == "beta": - from batchglm.api.models.tf1.glm_beta import Estimator - else: - raise ValueError("noise_model not recognized") - - provide_optimizers = {"gd": True, "adam": True, "adagrad": True, "rmsprop": True, - "nr": False, "nr_tr": False, - "irls": False, "irls_gd": False, "irls_tr": False, "irls_gd_tr": False} - - estimator = Estimator( - input_data=input_data, - quick_scale=False, - provide_optimizers=provide_optimizers, - provide_fim=False, - provide_hessian=False, - init_a="standard", - init_b="standard" - ) - estimator.initialize() - # Do not train, evaluate at initialization! - estimator.train_sequence(training_strategy=[ - { - "convergence_criteria": "step", - "stopping_criteria": 0, - "use_batching": False, - "optim_algo": "gd", - "train_mu": False, - "train_r": False - }, - ]) - estimator_store = estimator.finalize() - return estimator_store.gradients.values - - def compare_jacs( - self, - design, - sparse - ): - if self.noise_model is None: - raise ValueError("noise_model is None") - else: - if self.noise_model=="nb": - from batchglm.api.models.tf1.glm_nb import InputDataGLM - elif self.noise_model == "norm": - from batchglm.api.models import InputDataGLM - elif self.noise_model == "beta": - from batchglm.api.models.tf1.glm_beta import InputDataGLM - else: - raise ValueError("noise_model not recognized") - - sample_description = data_utils.sample_description_from_xarray(self.sim.data, dim="observations") - design_loc = data_utils.design_matrix(sample_description, formula=design) - design_scale = data_utils.design_matrix(sample_description, formula=design) - - if sparse: - input_data = InputDataGLM( - data=scipy.sparse.csr_matrix(self.sim.X), - design_loc=design_loc, - design_scale=design_scale - ) - else: - input_data = InputDataGLM( - data=self.sim.X, - design_loc=design_loc, - design_scale=design_scale - ) - - logging.getLogger("batchglm").debug("** Running analytic Jacobian test") - pkg_constants.JACOBIAN_MODE = "analytic" - t0_analytic = time.time() - J_analytic = self.get_jacs(input_data) - t1_analytic = time.time() - t_analytic = t1_analytic - t0_analytic - - logging.getLogger("batchglm").debug("** Running tensorflow Jacobian test") - pkg_constants.JACOBIAN_MODE = "tf1" - t0_tf = time.time() - J_tf = self.get_jacs(input_data) - t1_tf = time.time() - t_tf = t1_tf - t0_tf - - # Make sure that jacobians are not all zero which might make evaluation of equality difficult. - assert np.sum(np.abs(J_analytic)) > 1e-10, \ - "jacobians too small to perform test: %f" % np.sum(np.abs(J_analytic)) - - logging.getLogger("batchglm").info("run time tensorflow solution: %f" % t_tf) - logging.getLogger("batchglm").info("run time observation batch-wise analytic solution: %f" % t_analytic) - logging.getLogger("batchglm").info("MAD: %f" % np.max(np.abs((J_tf - J_analytic)))) - logging.getLogger("batchglm").info("MRAD: %f" % np.max(np.abs((J_tf - J_analytic) / J_tf))) - - #print(J_tf) - #print(J_analytic) - #print((J_tf - J_analytic) / J_tf) - - mrad = np.max(np.abs((J_tf - J_analytic) / J_tf)) - assert mrad < 1e-12, mrad - return True - - def _test_compute_jacobians(self, sparse): - self.simulate() - self.compare_jacs(design="~ 1 + condition + batch", sparse=sparse) - - -class Test_Jacobians_GLM_NB(Test_Jacobians_GLM_ALL, unittest.TestCase): - - def test_compute_jacobians_nb(self): - logging.getLogger("tensorflow").setLevel(logging.INFO) - logging.getLogger("batchglm").setLevel(logging.INFO) - logging.getLogger("batchglm").error("Test_Jacobians_GLM_NB.test_compute_jacobians_nb()") - - self.noise_model = "nb" - self._test_compute_jacobians(sparse=False) - #self._test_compute_jacobians(sparse=True) #TODO automatic differentiation does not seems to work here yet. - - -class Test_Jacobians_GLM_NORM(Test_Jacobians_GLM_ALL, unittest.TestCase): - - def test_compute_jacobians_norm(self): - logging.getLogger("tensorflow").setLevel(logging.INFO) - logging.getLogger("batchglm").setLevel(logging.INFO) - logging.getLogger("batchglm").error("Test_Jacobians_GLM_NORM.test_compute_jacobians_norm()") - - self.noise_model = "norm" - self._test_compute_jacobians(sparse=False) - #self._test_compute_jacobians(sparse=True) #TODO automatic differentiation does not seem to work here yet. - -class Test_Jacobians_GLM_BETA(Test_Jacobians_GLM_ALL, unittest.TestCase): - - def test_compute_jacobians_beta(self): - logging.getLogger("tensorflow").setLevel(logging.INFO) - logging.getLogger("batchglm").setLevel(logging.INFO) - logging.getLogger("batchglm").error("Test_Jacobians_GLM_BETA.test_compute_jacobians_beta()") - - self.noise_model = "beta" - self._test_compute_jacobians(sparse=False) - #self._test_compute_jacobians(sparse=True) #TODO automatic differentiation does not seem to work here yet. - - -if __name__ == '__main__': - unittest.main() diff --git a/batchglm/unit_test/test_jacobians_glm_all_tf2.py b/batchglm/unit_test/test_jacobians_glm_all_tf2.py deleted file mode 100644 index 5bb329dd..00000000 --- a/batchglm/unit_test/test_jacobians_glm_all_tf2.py +++ /dev/null @@ -1,186 +0,0 @@ -import logging -import unittest -import time -import numpy as np -import scipy.sparse - -import batchglm.api as glm -import batchglm.data as data_utils -import batchglm.pkg_constants as pkg_constants - -from batchglm.models.base_glm import InputDataGLM - - -class Test_Jacobians_GLM_ALL(unittest.TestCase): - noise_model: str - - def setUp(self): - pass - - def tearDown(self): - pass - - def simulate(self): - if self.noise_model is None: - raise ValueError("noise_model is None") - else: - if self.noise_model == "nb": - from batchglm.api.models.glm_nb import Simulator - elif self.noise_model == "norm": - from batchglm.api.models.glm_norm import Simulator - elif self.noise_model == "beta": - from batchglm.api.models.glm_beta import Simulator - else: - raise ValueError("noise_model not recognized") - - num_observations = 500 - sim = Simulator(num_observations=num_observations, num_features=4) - sim.generate_sample_description(num_conditions=2, num_batches=2) - sim.generate() - - self.sim = sim - - def get_jacs( - self, - input_data: InputDataGLM - ): - if self.noise_model is None: - raise ValueError("noise_model is None") - else: - if self.noise_model == "nb": - from batchglm.api.models.glm_nb import Estimator - elif self.noise_model == "norm": - from batchglm.api.models.glm_norm import Estimator - elif self.noise_model == "beta": - from batchglm.api.models.glm_beta import Estimator - else: - raise ValueError("noise_model not recognized") - - estimator = Estimator( - input_data=input_data, - init_a=self.sim.a_var, - init_b=self.sim.b_var - ) - estimator.initialize() - # Do not train, evaluate at initialization! - estimator.train_sequence(training_strategy=[ - { - "convergence_criteria": "step", - "stopping_criteria": 1, - "use_batching": False, - "optim_algo": "gd", - "train_mu": True, - "train_r": True, - "autograd": pkg_constants.JACOBIAN_MODE == "tf" - }, - ]) - estimator.finalize() - return estimator.jacobian - - def compare_jacs( - self, - design, - sparse - ): - if self.noise_model is None: - raise ValueError("noise_model is None") - else: - if self.noise_model=="nb": - from batchglm.api.models.glm_nb import InputDataGLM - elif self.noise_model == "norm": - from batchglm.api.models.glm_norm import InputDataGLM - elif self.noise_model == "beta": - from batchglm.api.models.glm_beta import InputDataGLM - else: - raise ValueError("noise_model not recognized") - - sample_description = self.sim.sample_description - design_loc = data_utils.design_matrix(sample_description, formula=design) - design_scale = data_utils.design_matrix(sample_description, formula=design) - - if sparse: - input_data = InputDataGLM( - data=scipy.sparse.csr_matrix(self.sim.x), - design_loc=design_loc, - design_scale=design_scale - ) - else: - input_data = InputDataGLM( - data=self.sim.x, - design_loc=design_loc, - design_scale=design_scale - ) - - logging.getLogger("batchglm").debug("** Running analytic Jacobian test") - pkg_constants.JACOBIAN_MODE = "analytic" - t0_analytic = time.time() - J_analytic = self.get_jacs(input_data) - t1_analytic = time.time() - t_analytic = t1_analytic - t0_analytic - - logging.getLogger("batchglm").debug("** Running tensorflow Jacobian test") - pkg_constants.JACOBIAN_MODE = "tf" - t0_tf = time.time() - J_tf = self.get_jacs(input_data) - t1_tf = time.time() - t_tf = t1_tf - t0_tf - - # Make sure that jacobians are not all zero which might make evaluation of equality difficult. - assert np.sum(np.abs(J_analytic)) > 1e-10, \ - "jacobians too small to perform test: %f" % np.sum(np.abs(J_analytic)) - - logging.getLogger("batchglm").info("run time tensorflow solution: %f" % t_tf) - logging.getLogger("batchglm").info("run time observation batch-wise analytic solution: %f" % t_analytic) - logging.getLogger("batchglm").info("MAD: %f" % np.max(np.abs((J_tf - J_analytic)))) - logging.getLogger("batchglm").info("MRAD: %f" % np.max(np.abs((J_tf - J_analytic) / J_tf))) - - #print(J_tf) - #print(J_analytic) - #print((J_tf - J_analytic) / J_tf) - - mrad = np.max(np.abs((J_tf - J_analytic) / J_tf)) - assert mrad < 1e-10, mrad # changed 1e-12 to 1e-10 - return True - - def _test_compute_jacobians(self, sparse): - self.simulate() - self.compare_jacs(design="~ 1 + condition + batch", sparse=sparse) - - -class Test_Jacobians_GLM_NB(Test_Jacobians_GLM_ALL, unittest.TestCase): - - def test_compute_jacobians_nb(self): - logging.getLogger("tensorflow").setLevel(logging.INFO) - logging.getLogger("batchglm").setLevel(logging.INFO) - logging.getLogger("batchglm").error("Test_Jacobians_GLM_NB.test_compute_jacobians_nb()") - - self.noise_model = "nb" - self._test_compute_jacobians(sparse=False) - #self._test_compute_jacobians(sparse=True) #TODO automatic differentiation does not seems to work here yet. - -""" -class Test_Jacobians_GLM_NORM(Test_Jacobians_GLM_ALL, unittest.TestCase): - - def test_compute_jacobians_norm(self): - logging.getLogger("tensorflow").setLevel(logging.INFO) - logging.getLogger("batchglm").setLevel(logging.INFO) - logging.getLogger("batchglm").error("Test_Jacobians_GLM_NORM.test_compute_jacobians_norm()") - - self.noise_model = "norm" - self._test_compute_jacobians(sparse=False) - #self._test_compute_jacobians(sparse=True) #TODO automatic differentiation does not seem to work here yet. - -class Test_Jacobians_GLM_BETA(Test_Jacobians_GLM_ALL, unittest.TestCase): - - def test_compute_jacobians_beta(self): - logging.getLogger("tensorflow").setLevel(logging.INFO) - logging.getLogger("batchglm").setLevel(logging.INFO) - logging.getLogger("batchglm").error("Test_Jacobians_GLM_BETA.test_compute_jacobians_beta()") - - self.noise_model = "beta" - self._test_compute_jacobians(sparse=False) - #self._test_compute_jacobians(sparse=True) #TODO automatic differentiation does not seem to work here yet. -""" - -if __name__ == '__main__': - unittest.main() diff --git a/batchglm/unit_test/test_simulators_glm_all.py b/batchglm/unit_test/test_simulators_glm_all.py deleted file mode 100644 index 306fbcd1..00000000 --- a/batchglm/unit_test/test_simulators_glm_all.py +++ /dev/null @@ -1,128 +0,0 @@ -import logging -import unittest -import numpy as np - -import batchglm.api as glm -from batchglm.models.base_glm import _SimulatorGLM, InputDataGLM - -glm.setup_logging(verbosity="WARNING", stream="STDOUT") -logger = logging.getLogger(__name__) - - -class TestSimulationGlmAll: - - sim: _SimulatorGLM - input_data: InputDataGLM - noise_model: str - - def eval_simulation_mean( - self - ): - if self.noise_model is None: - raise ValueError("noise_model is None") - else: - if self.noise_model == "nb": - threshold_dev = 1e-2 - threshold_std = 1e-1 - elif self.noise_model == "norm": - threshold_dev = 1e-2 - threshold_std = 1e-1 - elif self.noise_model == "beta": - threshold_dev = 1e-2 - threshold_std = 1e-1 - else: - raise ValueError("noise_model not recognized") - - means_sim = self.sim.a_var[0, :] - means_obs = self.sim.link_loc(np.mean(self.sim.input_data.x, axis=0)) - mean_dev = np.mean(means_sim - means_obs) - std_dev = np.std(means_sim - means_obs) - - logging.getLogger("batchglm").info("mean_dev_a %f" % mean_dev) - logging.getLogger("batchglm").info("std_dev_a %f" % std_dev) - - if np.abs(mean_dev) < threshold_dev and \ - std_dev < threshold_std: - return True - else: - return False - - def _test_all_moments(self): - if self.noise_model is None: - raise ValueError("noise_model is None") - else: - if self.noise_model == "nb": - from batchglm.api.models.tf1.glm_nb import Simulator - elif self.noise_model == "norm": - from batchglm.api.models import Simulator - elif self.noise_model == "beta": - from batchglm.api.models.tf1.glm_beta import Simulator - else: - raise ValueError("noise_model not recognized") - - self.sim = Simulator( - num_observations=100000, - num_features=10 - ) - self.sim.generate_sample_description(num_batches=1, num_conditions=1) - self.sim.generate_params() - self.sim.generate_data() - - success = self.eval_simulation_mean() - assert success, "mean of simulation was inaccurate" - return True - - -class TestSimulationGlmNb( - TestSimulationGlmAll, - unittest.TestCase -): - """ - Test whether optimizers yield exact results for negative binomial data. - """ - - def test(self): - logging.getLogger("tensorflow").setLevel(logging.ERROR) - logging.getLogger("batchglm").setLevel(logging.INFO) - logger.error("TestSimulationGlmNb.test()") - - self.noise_model = "nb" - self._test_all_moments() - - -class TestSimulationGlmNorm( - TestSimulationGlmAll, - unittest.TestCase -): - """ - Test whether optimizers yield exact results for normally distributed data. - """ - - def test(self): - logging.getLogger("tensorflow").setLevel(logging.ERROR) - logging.getLogger("batchglm").setLevel(logging.INFO) - logger.error("TestSimulationGlmNorm.test()") - - self.noise_model = "norm" - self._test_all_moments() - - -class TestSimulationGlmBeta( - TestSimulationGlmAll, - unittest.TestCase -): - """ - Test whether optimizers yield exact results for beta distributed data. - """ - - def test(self): - logging.getLogger("tensorflow").setLevel(logging.ERROR) - logging.getLogger("batchglm").setLevel(logging.INFO) - logger.error("TestSimulationGlmBeta.test()") - - self.noise_model = "beta" - self._test_all_moments() - - -if __name__ == '__main__': - unittest.main() From 8296133e2e4e98d66e4c2b6177c0f353544a4761 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Thu, 27 Jan 2022 18:37:16 +0100 Subject: [PATCH 2/4] Remove TF mentions --- README.md | 6 +++--- batchglm/pkg_constants.py | 1 - setup.py | 4 ---- 3 files changed, 3 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 34e95b65..95d70bdc 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ # Fast and scalable fitting of over-determined generalized-linear models (GLMs) -batchglm was developed in the context of [diffxpy](https://github.com/theislab/diffxpy) to allow fast model fitting for differential expression analysis for single-cell RNA-seq data. However, one can use batchglm or its concepts in other scenarios where over-determined GLMs are encountered. batchglm is based on TensorFlow - +batchglm was developed in the context of [diffxpy](https://github.com/theislab/diffxpy) to allow fast model fitting for differential expression analysis for single-cell RNA-seq data. However, one can use batchglm or its concepts in other scenarios where over-determined GLMs are encountered. + diff --git a/batchglm/pkg_constants.py b/batchglm/pkg_constants.py index 9afb32bf..ed9749c8 100644 --- a/batchglm/pkg_constants.py +++ b/batchglm/pkg_constants.py @@ -28,7 +28,6 @@ GTOL_BY_FEATURE_SCALE = 1e-8 try: - import tensorflow as tf TF_NUM_THREADS = int(os.environ.get('TF_NUM_THREADS', 0)) TF_LOOP_PARALLEL_ITERATIONS = int(os.environ.get('TF_LOOP_PARALLEL_ITERATIONS', 10)) diff --git a/setup.py b/setup.py index 1397bde7..ef2dd5ab 100644 --- a/setup.py +++ b/setup.py @@ -28,10 +28,6 @@ 'dask' ], extras_require={ - 'optional': [ - 'tensorflow>=1.14.0', - 'tensorflow-gpu>=1.14.0' - ], 'plotting_deps': [ "matplotlib", "seaborn" From 70a08d92bca3aa4082d0b5a9e04fe8da39e75378 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Thu, 27 Jan 2022 18:38:42 +0100 Subject: [PATCH 3/4] Remove TF2 --- batchglm/api/models/__init__.py | 4 ---- batchglm/api/models/tf2/__init__.py | 3 --- batchglm/api/models/tf2/glm_beta.py | 2 -- batchglm/api/models/tf2/glm_nb.py | 2 -- batchglm/api/models/tf2/glm_norm.py | 2 -- 5 files changed, 13 deletions(-) delete mode 100644 batchglm/api/models/tf2/__init__.py delete mode 100644 batchglm/api/models/tf2/glm_beta.py delete mode 100644 batchglm/api/models/tf2/glm_nb.py delete mode 100644 batchglm/api/models/tf2/glm_norm.py diff --git a/batchglm/api/models/__init__.py b/batchglm/api/models/__init__.py index eff3c3f2..ca70d778 100644 --- a/batchglm/api/models/__init__.py +++ b/batchglm/api/models/__init__.py @@ -1,5 +1 @@ from . import numpy -try: - from . import tf2 -except ImportError: - tf2 = None diff --git a/batchglm/api/models/tf2/__init__.py b/batchglm/api/models/tf2/__init__.py deleted file mode 100644 index 8fbdb228..00000000 --- a/batchglm/api/models/tf2/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from . import glm_beta -from . import glm_nb -from . import glm_norm diff --git a/batchglm/api/models/tf2/glm_beta.py b/batchglm/api/models/tf2/glm_beta.py deleted file mode 100644 index 8b5f563e..00000000 --- a/batchglm/api/models/tf2/glm_beta.py +++ /dev/null @@ -1,2 +0,0 @@ -#from batchglm.models.glm_beta import InputDataGLM, Model, Simulator -#from batchglm.train.tf2.glm_beta import Estimator diff --git a/batchglm/api/models/tf2/glm_nb.py b/batchglm/api/models/tf2/glm_nb.py deleted file mode 100644 index 8e2ba7a9..00000000 --- a/batchglm/api/models/tf2/glm_nb.py +++ /dev/null @@ -1,2 +0,0 @@ -from batchglm.models.glm_nb import InputDataGLM, Model, Simulator -from batchglm.train.tf2.glm_nb import Estimator diff --git a/batchglm/api/models/tf2/glm_norm.py b/batchglm/api/models/tf2/glm_norm.py deleted file mode 100644 index 45fc0453..00000000 --- a/batchglm/api/models/tf2/glm_norm.py +++ /dev/null @@ -1,2 +0,0 @@ -#from batchglm.models.glm_norm import InputDataGLM, Model, Simulator -#from batchglm.train.tf2.glm_norm import Estimator From 12e75cd84ab5063ddffa866c46b5078e11d7b7a4 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Thu, 27 Jan 2022 18:56:32 +0100 Subject: [PATCH 4/4] Remove constants. --- batchglm/pkg_constants.py | 22 +--------------------- 1 file changed, 1 insertion(+), 21 deletions(-) diff --git a/batchglm/pkg_constants.py b/batchglm/pkg_constants.py index ed9749c8..eefd624c 100644 --- a/batchglm/pkg_constants.py +++ b/batchglm/pkg_constants.py @@ -25,24 +25,4 @@ XTOL_BY_FEATURE_LOC = 1e-8 XTOL_BY_FEATURE_SCALE = 1e-6 GTOL_BY_FEATURE_LOC = 1e-8 -GTOL_BY_FEATURE_SCALE = 1e-8 - -try: - - TF_NUM_THREADS = int(os.environ.get('TF_NUM_THREADS', 0)) - TF_LOOP_PARALLEL_ITERATIONS = int(os.environ.get('TF_LOOP_PARALLEL_ITERATIONS', 10)) - - TF_CONFIG_PROTO = tf.compat.v1.ConfigProto() - TF_CONFIG_PROTO.allow_soft_placement = True - TF_CONFIG_PROTO.log_device_placement = False - TF_CONFIG_PROTO.gpu_options.allow_growth = True - TF_CONFIG_PROTO.graph_options.optimizer_options.global_jit_level = tf.compat.v1.OptimizerOptions.ON_1 - - TF_CONFIG_PROTO.inter_op_parallelism_threads = TF_NUM_THREADS - TF_CONFIG_PROTO.intra_op_parallelism_threads = TF_NUM_THREADS - - if TF_NUM_THREADS == 0: - TF_NUM_THREADS = multiprocessing.cpu_count() - -except ImportError: - tf = None +GTOL_BY_FEATURE_SCALE = 1e-8 \ No newline at end of file