Fix typos in arviz codebase (#1700)

* Fix typos * Undo changes in label guide Co-authored-by: Oriol Abril-Pla <oriol.abril.pla@gmail.com>
arviz-devs · May 21, 2021 · 7ebedd2 · 7ebedd2
1 parent 1a39374
commit 7ebedd2
Show file tree

Hide file tree

Showing 31 changed files with 46 additions and 46 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -169,7 +169,7 @@
 
 ## v0.9.0 (2020 June 23)
 ### New features
-* loo-pit plot. The kde is computed over the data interval (this could be shorter than [0, 1]). The HDI is computed analitically ([1215](https://github.com/arviz-devs/arviz/pull/1215))
+* loo-pit plot. The kde is computed over the data interval (this could be shorter than [0, 1]). The HDI is computed analytically ([1215](https://github.com/arviz-devs/arviz/pull/1215))
 * Added `html_repr` of InferenceData objects for jupyter notebooks. ([1217](https://github.com/arviz-devs/arviz/pull/1217))
 * Added support for PyJAGS via the function `from_pyjags`. ([1219](https://github.com/arviz-devs/arviz/pull/1219) and [1245](https://github.com/arviz-devs/arviz/pull/1245))
 * `from_pymc3` can now retrieve `coords` and `dims` from model context ([1228](https://github.com/arviz-devs/arviz/pull/1228), [1240](https://github.com/arviz-devs/arviz/pull/1240) and [1249](https://github.com/arviz-devs/arviz/pull/1249))

diff --git a/arviz/data/datasets.py b/arviz/data/datasets.py
@@ -204,7 +204,7 @@ def load_arviz_data(dataset=None, data_home=None):
 
     Run with no parameters to get a list of all available models.
 
-    The directory to save to can also be set with the environement
+    The directory to save to can also be set with the environment
     variable `ARVIZ_HOME`. The checksum of the dataset is checked against a
     hardcoded value to watch for data corruption.
 

diff --git a/arviz/data/inference_data.py b/arviz/data/inference_data.py
@@ -1874,7 +1874,7 @@ def concat(*args, dim=None, copy=True, inplace=False, reset_dim=True):
                     msg = "Mismatch between the groups."
                     raise TypeError(msg)
             for group in arg._groups_all:
-                # handle data groups seperately
+                # handle data groups separately
                 if group not in ["observed_data", "constant_data", "predictions_constant_data"]:
                     # assert that groups are equal
                     if group not in arg0_groups:

diff --git a/arviz/data/io_emcee.py b/arviz/data/io_emcee.py
@@ -51,8 +51,8 @@ def _verify_names(sampler, var_names, arg_names, slices):
         num_vars = ndim
     else:
         num_vars = len(slices)
-    indexs = utils.arange(ndim)
-    slicing_try = np.concatenate([utils.one_de(indexs[idx]) for idx in slices])
+    indices = utils.arange(ndim)
+    slicing_try = np.concatenate([utils.one_de(indices[idx]) for idx in slices])
     if len(set(slicing_try)) != ndim:
         warnings.warn(
             "Check slices: Not all parameters in chain captured. "

diff --git a/arviz/data/io_numpyro.py b/arviz/data/io_numpyro.py
@@ -53,7 +53,7 @@ def __init__(
             Dictionary containing constant data variables mapped to their values.
         predictions_constant_data: dict
             Constant data used for out-of-sample predictions.
-        index_origin : int, optinal
+        index_origin : int, optional
         coords : dict[str] -> list[str]
             Map of dimensions to coordinates
         dims : dict[str] -> list[str]

diff --git a/arviz/data/io_pystan.py b/arviz/data/io_pystan.py
@@ -884,7 +884,7 @@ def get_attrs_stan3(fit, model=None):
 def infer_dtypes(fit, model=None):
     """Infer dtypes from Stan model code.
 
-    Function strips out generated quantities block and searchs for `int`
+    Function strips out generated quantities block and searches for `int`
     dtypes after stripping out comments inside the block.
     """
     if model is None:

diff --git a/arviz/plots/backends/bokeh/jointplot.py b/arviz/plots/backends/bokeh/jointplot.py
@@ -71,7 +71,7 @@ def plot_joint(
         ax_hist_x, _ = ax[0]
         axjoin, ax_hist_y = ax[1]
     else:
-        raise ValueError("ax must be of lenght 3 but found {}".format(len(ax)))
+        raise ValueError("ax must be of length 3 but found {}".format(len(ax)))
 
     # Set labels for axes
     x_var_name = make_label(plotters[0][0], plotters[0][1])

diff --git a/arviz/plots/backends/matplotlib/jointplot.py b/arviz/plots/backends/matplotlib/jointplot.py
@@ -63,7 +63,7 @@ def plot_joint(
     elif len(ax) == 3:
         axjoin, ax_hist_x, ax_hist_y = ax
     else:
-        raise ValueError("ax must be of lenght 3 but found {}".format(len(ax)))
+        raise ValueError("ax must be of length 3 but found {}".format(len(ax)))
 
     # Personalize axes
     ax_hist_x.tick_params(labelleft=False, labelbottom=False)

diff --git a/arviz/plots/forestplot.py b/arviz/plots/forestplot.py
@@ -83,7 +83,7 @@ def plot_forest(
         list with valid matplotlib colors, one color per model. Alternative a string can be passed.
         If the string is `cycle`, it will automatically chose a color per model from the matplotlibs
         cycle. If a single color is passed, eg 'k', 'C2', 'red' this color will be used for all
-        models. Defauls to 'cycle'.
+        models. Defaults to 'cycle'.
     textsize: float
         Text size scaling factor for labels, titles and lines. If None it will be autoscaled based
         on figsize.

diff --git a/arviz/plots/jointplot.py b/arviz/plots/jointplot.py
@@ -124,7 +124,7 @@ def plot_joint(
         >>>                 kind='kde',
         >>>                 figsize=(6, 6))
 
-    Overlayed plots:
+    Overlaid plots:
 
     .. plot::
         :context: close-figs

diff --git a/arviz/plots/kdeplot.py b/arviz/plots/kdeplot.py
@@ -201,7 +201,7 @@ def plot_kde(
         >>> az.plot_kde(mu_posterior, values2=tau_posterior)
 
 
-    Plot 2d contour KDE, without filling and countour lines using viridis cmap
+    Plot 2d contour KDE, without filling and contour lines using viridis cmap
 
     .. plot::
         :context: close-figs

diff --git a/arviz/plots/khatplot.py b/arviz/plots/khatplot.py
@@ -40,7 +40,7 @@ def plot_khat(
 
     Parameters
     ----------
-    khats : ELPDData cointaining Pareto shapes information or array of
+    khats : ELPDData containing Pareto shapes information or array of
         Pareto tail indices.
     color : str or array_like, optional
         Colors of the scatter plot, if color is a str all dots will

diff --git a/arviz/plots/plot_utils.py b/arviz/plots/plot_utils.py
@@ -263,7 +263,7 @@ def set_xticklabels(ax, coord_labels):
 
 
 def filter_plotters_list(plotters, plot_kind):
-    """Cut list of plotters so that it is at most of lenght "plot.max_subplots"."""
+    """Cut list of plotters so that it is at most of length "plot.max_subplots"."""
     max_plots = rcParams["plot.max_subplots"]
     max_plots = len(plotters) if max_plots is None else max_plots
     if len(plotters) > max_plots:
@@ -455,7 +455,7 @@ def set_bokeh_circular_ticks_labels(ax, hist, labels):
 
 
 def compute_ranks(ary):
-    """Compute ranks for continuos and discrete variables."""
+    """Compute ranks for continuous and discrete variables."""
     if ary.dtype.kind == "i":
         ary_shape = ary.shape
         ary = ary.flatten()

diff --git a/arviz/plots/separationplot.py b/arviz/plots/separationplot.py
@@ -50,7 +50,7 @@ def plot_separation(
     textsize: int, optional
         Text size for labels. If None it will be autoscaled based on figsize.
     color : str, optional
-        Color to assign to the postive class. The negative class will be plotted using the
+        Color to assign to the positive class. The negative class will be plotted using the
         same color and an `alpha=0.3` transparency.
     legend : bool, optional
         Show the legend of the figure.
@@ -126,7 +126,7 @@ def plot_separation(
 
     if len(y) != len(y_hat):
         warnings.warn(
-            "y and y_hat must be the same lenght",
+            "y and y_hat must be the same length",
             UserWarning,
         )
 

diff --git a/arviz/plots/traceplot.py b/arviz/plots/traceplot.py
@@ -82,13 +82,13 @@ def plot_trace(
     compact: bool, optional
         Plot multidimensional variables in a single plot.
     compact_prop: str or dict {str: array_like}, optional
-        Tuple containing the property name and the property values to distinguish diferent
+        Tuple containing the property name and the property values to distinguish different
         dimensions with compact=True
     combined: bool, optional
         Flag for combining multiple chains into a single line. If False (default), chains will be
         plotted separately.
     chain_prop: str or dict {str: array_like}, optional
-        Tuple containing the property name and the property values to distinguish diferent chains
+        Tuple containing the property name and the property values to distinguish different chains
     legend: bool, optional
         Add a legend to the figure with the chain color code.
     plot_kwargs, fill_kwargs, rug_kwargs, hist_kwargs: dict, optional

diff --git a/arviz/rcparams.py b/arviz/rcparams.py
@@ -531,7 +531,7 @@ class rc_context:  # pylint: disable=invalid-name
     rc : dict, optional
         Mapping containing the rcParams to modify temporally.
     fname : str, optional
-        Filename of the file containig the rcParams to use inside the rc_context.
+        Filename of the file containing the rcParams to use inside the rc_context.
 
     Examples
     --------

diff --git a/arviz/stats/density_utils.py b/arviz/stats/density_utils.py
@@ -599,7 +599,7 @@ def _kde_linear(
     pdf : Numpy array for the density estimates.
     bw: optional, the estimated bandwidth.
     """
-    # Check `x` is from appropiate type
+    # Check `x` is from appropriate type
     try:
         x = _check_type(x)
     except ValueError as e:
@@ -935,7 +935,7 @@ def get_bins(values):
 
     Notes
     -----
-    Computes the width of the bins by taking the maximun of the Sturges and the Freedman-Diaconis
+    Computes the width of the bins by taking the maximum of the Sturges and the Freedman-Diaconis
     estimators. According to numpy `np.histogram` this provides good all around performance.
 
     The Sturges is a very simplistic estimator based on the assumption of normality of the data.

diff --git a/arviz/stats/stats.py b/arviz/stats/stats.py
@@ -1147,7 +1147,7 @@ def summary(
     if index_origin is not None:
         warnings.warn(
             "index_origin has been deprecated. summary now shows coordinate values, "
-            "to change the label shown, modify the coordinate values before calling sumary",
+            "to change the label shown, modify the coordinate values before calling summary",
             DeprecationWarning,
         )
         index_origin = rcParams["data.index_origin"]
@@ -1326,13 +1326,13 @@ def summary(
         summary_df = pd.DataFrame(
             (np.full((cast(int, n_vars), n_metrics), np.nan)), columns=metric_names
         )
-        indexs = []
+        indices = []
         for i, (var_name, sel, isel, values) in enumerate(
             xarray_var_iter(joined, skip_dims={"metric"})
         ):
             summary_df.iloc[i] = values
-            indexs.append(labeller.make_label_flat(var_name, sel, isel))
-        summary_df.index = indexs
+            indices.append(labeller.make_label_flat(var_name, sel, isel))
+        summary_df.index = indices
     elif fmt.lower() == "long":
         df = joined.to_dataframe().reset_index().set_index("metric")
         df.index = list(df.index)
@@ -1706,7 +1706,7 @@ def apply_test_function(
         kwargs passed to :func:`~arviz.wrap_xarray_ufunc`. By default, some suitable input_core_dims
         are used.
     inplace: bool, optional
-        If True, add the variables inplace, othewise, return a copy of idata with the variables
+        If True, add the variables inplace, otherwise, return a copy of idata with the variables
         added.
     overwrite: bool, optional
         Overwrite data in case ``out_name_data`` or ``out_name_pp`` are already variables in

diff --git a/arviz/tests/base_tests/test_diagnostics.py b/arviz/tests/base_tests/test_diagnostics.py
@@ -302,7 +302,7 @@ def test_effective_sample_size_nan(self, method, relative, chain, draw, use_nan)
             else:
                 ess_value = ess(data, method=method, relative=relative)
             assert not np.isnan(ess_value)
-        # test following only once tests are runned
+        # test following only once tests are run
         if (method == "bulk") and (not relative) and (chain is None) and (draw == 4):
             if use_nan:
                 assert np.isnan(_ess(data))
@@ -417,7 +417,7 @@ def test_mcse_bad_method(self, data, method):
     @pytest.mark.parametrize("draws", (3, 4, 100))
     @pytest.mark.parametrize("chains", (None, 1, 2))
     def test_multichain_summary_array(self, draws, chains):
-        """Test multichain statistics against invidual functions."""
+        """Test multichain statistics against individual functions."""
         if chains is None:
             ary = np.random.randn(draws)
         else:

diff --git a/arviz/tests/base_tests/test_plots_bokeh.py b/arviz/tests/base_tests/test_plots_bokeh.py
@@ -450,7 +450,7 @@ def test_plot_ess_evolution(models):
 
 
 def test_plot_ess_bad_kind(models):
-    """Test error when plot_ess recieves an invalid kind."""
+    """Test error when plot_ess receives an invalid kind."""
     idata = models.model_1
     with pytest.raises(ValueError, match="Invalid kind"):
         plot_ess(idata, kind="bad kind", backend="bokeh", show=False)

diff --git a/arviz/tests/base_tests/test_plots_matplotlib.py b/arviz/tests/base_tests/test_plots_matplotlib.py
@@ -1291,7 +1291,7 @@ def test_plot_ess_evolution(models):
 
 
 def test_plot_ess_bad_kind(models):
-    """Test error when plot_ess recieves an invalid kind."""
+    """Test error when plot_ess receives an invalid kind."""
     idata = models.model_1
     with pytest.raises(ValueError, match="Invalid kind"):
         plot_ess(idata, kind="bad kind")

diff --git a/arviz/tests/base_tests/test_stats.py b/arviz/tests/base_tests/test_stats.py
@@ -639,7 +639,7 @@ def test_loo_pit_bad_input_type(centered_eight, arg):
 
 @pytest.mark.parametrize("incompatibility", ["y-y_hat1", "y-y_hat2", "y_hat-log_weights"])
 def test_loo_pit_bad_input_shape(incompatibility):
-    """Test shape incompatiblities."""
+    """Test shape incompatibilities."""
     y = np.random.random(8)
     y_hat = np.random.random((8, 200))
     log_weights = np.random.random((8, 200))

diff --git a/arviz/tests/base_tests/test_stats_numba.py b/arviz/tests/base_tests/test_stats_numba.py
@@ -45,6 +45,6 @@ def test_numba_stats():
     Numba.enable_numba()
     with_numba = r2_score(set_1, set_2)
     with_numba_one_dimensional = r2_score(set_3, set_4)
-    assert state == Numba.numba_flag  # Ensure that inital state = final state
+    assert state == Numba.numba_flag  # Ensure that initial state = final state
     assert np.allclose(non_numba, with_numba)
     assert np.allclose(non_numba_one_dimensional, with_numba_one_dimensional)
diff --git a/arviz/tests/helpers.py b/arviz/tests/helpers.py
@@ -204,15 +204,15 @@ def check_multiple_attrs(
     It is thought to first check if the parent object contains a given dataset,
     and then (if present) check the attributes of the dataset.
 
-    Given the ouput of the function, all missmatches between expectation and reality can
+    Given the output of the function, all mismatches between expectation and reality can
     be retrieved: a single string indicates a group mismatch and a tuple of strings
     ``(group, var)`` indicates a mismatch in the variable ``var`` of ``group``.
 
     Parameters
     ----------
     test_dict: dict of {str : list of str}
         Its structure should be `{dataset1_name: [var1, var2], dataset2_name: [var]}`.
-        A ``~`` at the beggining of a dataset or variable name indicates the name NOT
+        A ``~`` at the beginning of a dataset or variable name indicates the name NOT
         being present must be asserted.
     parent: InferenceData
         InferenceData object on which to check the attributes.

diff --git a/doc/source/contributing/developer_guide.rst b/doc/source/contributing/developer_guide.rst
@@ -25,7 +25,7 @@ keyword argument defaulting and plot behavior
 The convenience function ``get_plotting_function`` available in
 ``arviz.plots.get_plotting_function`` should be called to obtain
 the correct plotting function from the associated backend. If
-adding a new backend follow the pattern provided to programatically
+adding a new backend follow the pattern provided to programmatically
 call the correct backend
 
 Test Separation

diff --git a/doc/source/getting_started/XarrayforArviZ.ipynb b/doc/source/getting_started/XarrayforArviZ.ipynb
@@ -22,7 +22,7 @@
     "\n",
     "\n",
     "## Why more than one data structure?\n",
-    "Bayesian Inference generates numerous datasets that represent different aspects of the model. For example in a single analysis a Bayesian practioner could end up with any of the following data.\n",
+    "Bayesian Inference generates numerous datasets that represent different aspects of the model. For example in a single analysis a Bayesian practitioner could end up with any of the following data.\n",
     "\n",
     "\n",
     "\n",

diff --git a/doc/source/user_guide/numpyro_refitting.ipynb b/doc/source/user_guide/numpyro_refitting.ipynb
@@ -2619,11 +2619,11 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "We initialize our sampling wrapper. Let's stop and analize each of the arguments. \n",
+    "We initialize our sampling wrapper. Let's stop and analyze each of the arguments. \n",
     "\n",
     "We use `idata_orig` as a starting point, and mostly as a source of observed and constant data which is then subsetted in `sel_observations`.\n",
     "\n",
-    "We also use `model` to get automatic log likelihood computation and we have the option to set the `rng_key`. Even if the data for each fit is different the `rng_key` is splitted with every fit.\n",
+    "We also use `model` to get automatic log likelihood computation and we have the option to set the `rng_key`. Even if the data for each fit is different the `rng_key` is split with every fit.\n",
     "\n",
     "Finally, `sample_kwargs` and `idata_kwargs` are used to make sure all refits and corresponding InferenceData are generated with the same properties."
    ]

diff --git a/doc/source/user_guide/numpyro_refitting_xr_lik.ipynb b/doc/source/user_guide/numpyro_refitting_xr_lik.ipynb
@@ -2092,7 +2092,7 @@
    "source": [
     "We are now missing the `log_likelihood` group because we have not used the `log_likelihood` argument in `idata_kwargs`. We are doing this to ease the job of the sampling wrapper. Instead of going out of our way to get Stan to calculate the pointwise log likelihood values for each refit and for the excluded observation at every refit, we will compromise and manually write a function to calculate the pointwise log likelihood.\n",
     "\n",
-    "Even though it is not ideal to lose part of the straight out of the box capabilities of PyStan-ArviZ integration, this should generally not be a problem. We are basically moving the pointwise log likelihood calculation from the Stan code to the Python code, in both cases we need to manyally write the function to calculate the pointwise log likelihood.\n",
+    "Even though it is not ideal to lose part of the straight out of the box capabilities of PyStan-ArviZ integration, this should generally not be a problem. We are basically moving the pointwise log likelihood calculation from the Stan code to the Python code, in both cases we need to manually write the function to calculate the pointwise log likelihood.\n",
     "\n",
     "Moreover, the Python computation could even be written to be compatible with Dask. Thus it will work even in cases where the large number of observations makes it impossible to store pointwise log likelihood values (with shape `n_samples * n_observations`) in memory."
    ]
@@ -4630,7 +4630,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "We initialize our sampling wrapper. Let's stop and analize each of the arguments. \n",
+    "We initialize our sampling wrapper. Let's stop and analyze each of the arguments. \n",
     "\n",
     "We then use the `log_lik_fun` and `posterior_vars` argument to tell the wrapper how to call `xr.apply_ufunc`. `log_lik_fun` is the function to be called, which is then called with the following positional arguments:\n",
     "\n",

diff --git a/doc/source/user_guide/pymc3_refitting_xr_lik.ipynb b/doc/source/user_guide/pymc3_refitting_xr_lik.ipynb
@@ -4840,7 +4840,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "We initialize our sampling wrapper. Let's stop and analize each of the arguments. \n",
+    "We initialize our sampling wrapper. Let's stop and analyze each of the arguments. \n",
     "\n",
     "We'd generally use `model` to pass a model object of some kind, already compiled and reexecutable, however, as we saw before, we need to recompile the model every time we use it to pass the model generating function instead. Close enough.\n",
     "\n",

diff --git a/doc/source/user_guide/pystan2_refitting.ipynb b/doc/source/user_guide/pystan2_refitting.ipynb
@@ -14,7 +14,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Below there is one example of `SamplingWrapper` usage for PyStan exteding {class}`arviz.PyStan2SamplingWrapper` which already implements some default methods targetted to PyStan.\n",
+    "Below there is one example of `SamplingWrapper` usage for PyStan exteding {class}`arviz.PyStan2SamplingWrapper` which already implements some default methods targeted to PyStan.\n",
     "\n",
     "Before starting, it is important to note that PyStan cannot call the C++ functions it uses. Therefore, the **code** of the model must be slightly modified in order to be compatible with the cross validation refitting functions."
    ]
@@ -153,7 +153,7 @@
     "        y_hat[i] = normal_rng(b0 + b1 * x[i], sigma_e);\n",
     "    }\n",
     "    for (j in 1:N_ex) {\n",
-    "        // calculate the log likelihood of the exluded data given data_for_fitting\n",
+    "        // calculate the log likelihood of the excluded data given data_for_fitting\n",
     "        log_lik_ex[j] = normal_lpdf(y_ex[j] | b0 + b1 * x_ex[j], sigma_e);\n",
     "    }\n",
     "}\n",
-Original file line number
+Diff line change
@@ Expand Up / @@ -22,7 +22,7 @@ @@
         "\n",
         "\n",
         "## Why more than one data structure?\n",
-        "Bayesian Inference generates numerous datasets that represent different aspects of the model. For example in a single analysis a Bayesian practioner could end up with any of the following data.\n",
+        "Bayesian Inference generates numerous datasets that represent different aspects of the model. For example in a single analysis a Bayesian practitioner could end up with any of the following data.\n",
         "\n",
         "\n",
         "\n",
@@ Expand Down @@