diff --git a/spherecluster/von_mises_fisher_mixture.py b/spherecluster/von_mises_fisher_mixture.py
index 53c4c67..7206f5d 100644
--- a/spherecluster/von_mises_fisher_mixture.py
+++ b/spherecluster/von_mises_fisher_mixture.py
@@ -5,6 +5,8 @@
 from joblib import Parallel, delayed
 from numpy import i0  # modified Bessel function of first kind order 0, I_0
 from scipy.special import iv  # modified Bessel function of first kind, I_v
+from scipy.special import i0e  # exp-scaled modified Bessel function of first kind order 0, I_0
+from scipy.special import ive  # exp-scaled modified Bessel function of first kind, I_v
 from scipy.special import logsumexp
 
 from sklearn.base import BaseEstimator, ClusterMixin, TransformerMixin
@@ -56,11 +58,31 @@ def _vmf_log(X, kappa, mu):
     """Computs log(vMF(X, kappa, mu)) using built-in numpy/scipy Bessel
     approximations.
 
-    Works well on small kappa and mu.
+    Works well on small kappa and mu. (see comment below about working in log scale)
     """
     n_examples, n_features = X.shape
-    return np.log(_vmf_normalize(kappa, n_features) * np.exp(kappa * X.dot(mu).T))
+    
+    #--- calculate log directly to avoid over-flow, so it works also with large kappa and mu
+    log_density = kappa * X.dot(mu).T
+    log_norm = _vmf_normalize_log(kappa, n_features)
+
+    return log_norm + log_density
+    
+def _vmf_normalize_log(kappa, dim):
+    """ Compute normalization constant in log scale, using built-in exp-scaled numpy/scipy Bessel """
+    log_num = (dim / 2. - 1.) * np.log(kappa)
+    if dim / 2. - 1. < 1e-15:
+        log_denom = dim / 2. * np.log(2. * np.pi) + np.log(i0e(kappa)) + kappa
+    else:
+        log_denom = dim / 2. * np.log(2. * np.pi) + np.log(ive(dim / 2. - 1., kappa)) + kappa
+    
+    if np.isinf(log_num):
+        raise ValueError("VMF scaling numerator was inf.")
+
+    if np.isinf(log_denom):
+        raise ValueError("VMF scaling denominator was inf.")
 
+    return log_num - log_denom
 
 def _vmf_normalize(kappa, dim):
     """Compute normalization constant using built-in numpy/scipy Bessel
@@ -68,12 +90,12 @@ def _vmf_normalize(kappa, dim):
 
     Works well on small kappa and mu.
     """
-    num = np.power(kappa, dim / 2.0 - 1.0)
+    num = np.power(kappa, dim / 2. - 1.)
 
-    if dim / 2.0 - 1.0 < 1e-15:
-        denom = np.power(2.0 * np.pi, dim / 2.0) * i0(kappa)
+    if dim / 2. - 1. < 1e-15:
+        denom = np.power(2. * np.pi, dim / 2.) * i0(kappa)
     else:
-        denom = np.power(2.0 * np.pi, dim / 2.0) * iv(dim / 2.0 - 1.0, kappa)
+        denom = np.power(2. * np.pi, dim / 2.) * iv(dim / 2. - 1., kappa)
 
     if np.isinf(num):
         raise ValueError("VMF scaling numerator was inf.")
@@ -95,9 +117,9 @@ def _log_H_asymptotic(nu, kappa):
     from https://cran.r-project.org/web/packages/movMF/index.html
     """
     beta = np.sqrt((nu + 0.5) ** 2)
-    kappa_l = np.min([kappa, np.sqrt((3.0 * nu + 11.0 / 2.0) * (nu + 3.0 / 2.0))])
+    kappa_l = np.min([kappa, np.sqrt((3. * nu + 11. / 2.) * (nu + 3. / 2.))])
     return _S(kappa, nu + 0.5, beta) + (
-        _S(kappa_l, nu, nu + 2.0) - _S(kappa_l, nu + 0.5, beta)
+        _S(kappa_l, nu, nu + 2.) - _S(kappa_l, nu + 0.5, beta)
     )
 
 
@@ -110,9 +132,9 @@ def _S(kappa, alpha, beta):
     See "S <-" in movMF.R and utility function implementation notes from
     https://cran.r-project.org/web/packages/movMF/index.html
     """
-    kappa = 1.0 * np.abs(kappa)
-    alpha = 1.0 * alpha
-    beta = 1.0 * np.abs(beta)
+    kappa = 1. * np.abs(kappa)
+    alpha = 1. * alpha
+    beta = 1. * np.abs(beta)
     a_plus_b = alpha + beta
     u = np.sqrt(kappa ** 2 + beta ** 2)
     if alpha == 0:
@@ -137,7 +159,7 @@ def _vmf_log_asymptotic(X, kappa, mu):
     https://cran.r-project.org/web/packages/movMF/index.html
     """
     n_examples, n_features = X.shape
-    log_vfm = kappa * X.dot(mu).T + -_log_H_asymptotic(n_features / 2.0 - 1.0, kappa)
+    log_vfm = kappa * X.dot(mu).T + -_log_H_asymptotic(n_features / 2. - 1., kappa)
 
     return log_vfm
 
@@ -341,11 +363,11 @@ def _maximization(X, posterior, force_weights=None):
 
         # update concentration (kappa) [TODO: add other kappa approximations]
         rbar = center_norm / (n_examples * weights[cc])
-        concentrations[cc] = rbar * n_features - np.power(rbar, 3.0)
+        concentrations[cc] = rbar * n_features - np.power(rbar, 3.)
         if np.abs(rbar - 1.0) < 1e-10:
             concentrations[cc] = MAX_CONTENTRATION
         else:
-            concentrations[cc] /= 1.0 - np.power(rbar, 2.0)
+            concentrations[cc] /= 1. - np.power(rbar, 2.)
 
         # let python know we can free this (good for large dense X)
         del X_scaled
@@ -784,13 +806,13 @@ def _check_fit_data(self, X):
             else:
                 n = np.linalg.norm(X[ee, :])
 
-            if np.abs(n - 1.0) > 1e-4:
+            if np.abs(n - 1.) > 1e-4:
                 raise ValueError("Data l2-norm must be 1, found {}".format(n))
 
         return X
 
     def _check_test_data(self, X):
-        X = check_array(X, accept_sparse="csr", dtype=FLOAT_DTYPES)
+        X = check_array(X, accept_sparse="csr", dtype=FLOAT_DTYPES, warn_on_dtype=True)
         n_samples, n_features = X.shape
         expected_n_features = self.cluster_centers_.shape[1]
         if not n_features == expected_n_features:
@@ -805,7 +827,7 @@ def _check_test_data(self, X):
             else:
                 n = np.linalg.norm(X[ee, :])
 
-            if np.abs(n - 1.0) > 1e-4:
+            if np.abs(n - 1.) > 1e-4:
                 raise ValueError("Data l2-norm must be 1, found {}".format(n))
 
         return X
@@ -884,7 +906,7 @@ def transform(self, X, y=None):
         if self.normalize:
             X = normalize(X)
 
-        check_is_fitted(self)
+        check_is_fitted(self, "cluster_centers_")
         X = self._check_test_data(X)
         return self._transform(X)
 
@@ -913,7 +935,7 @@ def predict(self, X):
         if self.normalize:
             X = normalize(X)
 
-        check_is_fitted(self)
+        check_is_fitted(self, "cluster_centers_")
 
         X = self._check_test_data(X)
         return _labels_inertia(X, self.cluster_centers_)[0]
@@ -934,12 +956,12 @@ def score(self, X, y=None):
         if self.normalize:
             X = normalize(X)
 
-        check_is_fitted(self)
+        check_is_fitted(self, "cluster_centers_")
         X = self._check_test_data(X)
         return -_labels_inertia(X, self.cluster_centers_)[1]
 
     def log_likelihood(self, X):
-        check_is_fitted(self)
+        check_is_fitted(self, "cluster_centers_")
 
         return _log_likelihood(
             X, self.cluster_centers_, self.weights_, self.concentrations_