From 7dba19bf2a57cb49a4a32c22a8c236c6317fecba Mon Sep 17 00:00:00 2001 From: Steve Bachmeier <23350991+stevebachmeier@users.noreply.github.com> Date: Mon, 19 Aug 2024 12:32:37 -0600 Subject: [PATCH 1/8] update results management system docstrings (#464) * update docstrings * pin sphinx-rtd-theme>=0.6 * various type hint updates * change Stratification __call__() method stratify() * move shared custom types to a new types.py module --- src/vivarium/framework/results/context.py | 2 +- src/vivarium/framework/results/interface.py | 2 +- src/vivarium/framework/results/manager.py | 2 +- src/vivarium/framework/results/observation.py | 2 +- .../framework/results/stratification.py | 23 ++++++++++++++++++- 5 files changed, 26 insertions(+), 5 deletions(-) diff --git a/src/vivarium/framework/results/context.py b/src/vivarium/framework/results/context.py index f7d23d0a5..317867dbd 100644 --- a/src/vivarium/framework/results/context.py +++ b/src/vivarium/framework/results/context.py @@ -5,7 +5,7 @@ """ from collections import defaultdict -from typing import Callable, Generator, List, Optional, Tuple, Type, Union +from typing import Any, Callable, Generator, List, Optional, Tuple, Type, Union import pandas as pd from pandas.core.groupby.generic import DataFrameGroupBy diff --git a/src/vivarium/framework/results/interface.py b/src/vivarium/framework/results/interface.py index 25eb9264d..7ed8979b6 100644 --- a/src/vivarium/framework/results/interface.py +++ b/src/vivarium/framework/results/interface.py @@ -8,7 +8,7 @@ to a simulation. """ -from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Union +from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Union import pandas as pd diff --git a/src/vivarium/framework/results/manager.py b/src/vivarium/framework/results/manager.py index 25807899c..ae052744e 100644 --- a/src/vivarium/framework/results/manager.py +++ b/src/vivarium/framework/results/manager.py @@ -6,7 +6,7 @@ from collections import defaultdict from enum import Enum -from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Tuple, Union +from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, Union import pandas as pd diff --git a/src/vivarium/framework/results/observation.py b/src/vivarium/framework/results/observation.py index d31062167..6b1fb8255 100644 --- a/src/vivarium/framework/results/observation.py +++ b/src/vivarium/framework/results/observation.py @@ -75,7 +75,7 @@ def observe( df: Union[pd.DataFrame, DataFrameGroupBy], stratifications: Optional[tuple[str, ...]], ) -> Optional[pd.DataFrame]: - # """Determine whether to observe the given event and, if so, gather the results.""" + """Determine whether to observe the given event and, if so, gather the results.""" if not self.to_observe(event): return None else: diff --git a/src/vivarium/framework/results/stratification.py b/src/vivarium/framework/results/stratification.py index 765a047ac..c9ec7e279 100644 --- a/src/vivarium/framework/results/stratification.py +++ b/src/vivarium/framework/results/stratification.py @@ -5,7 +5,7 @@ """ from dataclasses import dataclass -from typing import Callable, List, Optional, Union +from typing import Any, Callable, List, Optional, Union import pandas as pd from pandas.api.types import CategoricalDtype @@ -24,6 +24,27 @@ class Stratification: This class includes a :meth:`stratify ` method that produces an output column by calling the mapper on the source columns. + + Attributes + ---------- + name + Name of the stratification. + sources + A list of the columns and values needed as input for the `mapper`. + categories + Exhaustive list of all possible stratification values. + excluded_categories + List of possible stratification values to exclude from results processing. + If None (the default), will use exclusions as defined in the configuration. + mapper + A callable that maps the columns and value pipelines specified by the + `requires_columns` and `requires_values` arguments to the stratification + categories. It can either map the entire population or an individual + simulant. A simulation will fail if the `mapper` ever produces an invalid + value. + is_vectorized + True if the `mapper` function will map the entire population, and False + if it will only map a single simulant. """ name: str From 7fb20a417677a01283e91f51325c75fefc8b686c Mon Sep 17 00:00:00 2001 From: Steve Bachmeier <23350991+stevebachmeier@users.noreply.github.com> Date: Mon, 19 Aug 2024 14:29:31 -0600 Subject: [PATCH 2/8] Fixes for doc building; minor docstring updates (#467) --- src/vivarium/framework/results/context.py | 2 +- src/vivarium/framework/results/interface.py | 2 +- src/vivarium/framework/results/manager.py | 2 +- src/vivarium/framework/results/observation.py | 2 +- .../framework/results/stratification.py | 23 +------------------ 5 files changed, 5 insertions(+), 26 deletions(-) diff --git a/src/vivarium/framework/results/context.py b/src/vivarium/framework/results/context.py index 317867dbd..f7d23d0a5 100644 --- a/src/vivarium/framework/results/context.py +++ b/src/vivarium/framework/results/context.py @@ -5,7 +5,7 @@ """ from collections import defaultdict -from typing import Any, Callable, Generator, List, Optional, Tuple, Type, Union +from typing import Callable, Generator, List, Optional, Tuple, Type, Union import pandas as pd from pandas.core.groupby.generic import DataFrameGroupBy diff --git a/src/vivarium/framework/results/interface.py b/src/vivarium/framework/results/interface.py index 7ed8979b6..25eb9264d 100644 --- a/src/vivarium/framework/results/interface.py +++ b/src/vivarium/framework/results/interface.py @@ -8,7 +8,7 @@ to a simulation. """ -from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Union +from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Union import pandas as pd diff --git a/src/vivarium/framework/results/manager.py b/src/vivarium/framework/results/manager.py index ae052744e..25807899c 100644 --- a/src/vivarium/framework/results/manager.py +++ b/src/vivarium/framework/results/manager.py @@ -6,7 +6,7 @@ from collections import defaultdict from enum import Enum -from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, Union +from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Tuple, Union import pandas as pd diff --git a/src/vivarium/framework/results/observation.py b/src/vivarium/framework/results/observation.py index 6b1fb8255..d31062167 100644 --- a/src/vivarium/framework/results/observation.py +++ b/src/vivarium/framework/results/observation.py @@ -75,7 +75,7 @@ def observe( df: Union[pd.DataFrame, DataFrameGroupBy], stratifications: Optional[tuple[str, ...]], ) -> Optional[pd.DataFrame]: - """Determine whether to observe the given event and, if so, gather the results.""" + # """Determine whether to observe the given event and, if so, gather the results.""" if not self.to_observe(event): return None else: diff --git a/src/vivarium/framework/results/stratification.py b/src/vivarium/framework/results/stratification.py index c9ec7e279..765a047ac 100644 --- a/src/vivarium/framework/results/stratification.py +++ b/src/vivarium/framework/results/stratification.py @@ -5,7 +5,7 @@ """ from dataclasses import dataclass -from typing import Any, Callable, List, Optional, Union +from typing import Callable, List, Optional, Union import pandas as pd from pandas.api.types import CategoricalDtype @@ -24,27 +24,6 @@ class Stratification: This class includes a :meth:`stratify ` method that produces an output column by calling the mapper on the source columns. - - Attributes - ---------- - name - Name of the stratification. - sources - A list of the columns and values needed as input for the `mapper`. - categories - Exhaustive list of all possible stratification values. - excluded_categories - List of possible stratification values to exclude from results processing. - If None (the default), will use exclusions as defined in the configuration. - mapper - A callable that maps the columns and value pipelines specified by the - `requires_columns` and `requires_values` arguments to the stratification - categories. It can either map the entire population or an individual - simulant. A simulation will fail if the `mapper` ever produces an invalid - value. - is_vectorized - True if the `mapper` function will map the entire population, and False - if it will only map a single simulant. """ name: str From 827f2111976b1a83eb7e45cebae528d8b7af72e5 Mon Sep 17 00:00:00 2001 From: Steve Bachmeier Date: Tue, 20 Aug 2024 16:08:18 -0700 Subject: [PATCH 3/8] Add results concept doc --- docs/source/concepts/lifecycle.rst | 5 +- .../concepts/model_specification/index.rst | 33 ++ docs/source/concepts/results.rst | 360 +++++++++++++++++- docs/source/concepts/time.rst | 8 - src/vivarium/framework/results/context.py | 2 +- src/vivarium/framework/results/interface.py | 8 +- src/vivarium/framework/results/manager.py | 2 +- src/vivarium/framework/results/observation.py | 10 +- .../framework/results/stratification.py | 6 +- 9 files changed, 407 insertions(+), 27 deletions(-) diff --git a/docs/source/concepts/lifecycle.rst b/docs/source/concepts/lifecycle.rst index 3b3d601a2..0cb271947 100644 --- a/docs/source/concepts/lifecycle.rst +++ b/docs/source/concepts/lifecycle.rst @@ -34,7 +34,8 @@ during a simulation. * - | :ref:`Main Loop ` - | The core logic (as encoded in the simulation components) is executed. * - | :ref:`Simulation End ` - - | The population state is finalized and results are tabulated. + - | The population state is finalized and results are tabulated and written + | to disk. The simulation itself maintains a formal representation of its internal execution state using the tools in the :mod:`~vivarium.framework.lifecycle` @@ -197,4 +198,4 @@ simulation end. It is split into two states. During the first, the signal that the event loop has finished and the :ref:`state table ` is final. At this point, final simulation outputs are safe to compute. The second state is *report* in -which the simulation will accumulate all final outputs and return them. +which the simulation will accumulate all final outputs and write them to disk. diff --git a/docs/source/concepts/model_specification/index.rst b/docs/source/concepts/model_specification/index.rst index 2f4536780..16efb98a2 100644 --- a/docs/source/concepts/model_specification/index.rst +++ b/docs/source/concepts/model_specification/index.rst @@ -24,6 +24,11 @@ You can find a short intro to yaml basics :local: :backlinks: none +The Plugins Block +----------------- + +TBD + The Components Block -------------------- The components block of the model specification file contains the information @@ -89,3 +94,31 @@ and a diarrhea disease model. :hidden: yaml_basics + +The Configuration Block +----------------------- + +The configuration block of the model specification file contains any information +necessary to configure the simulation to run, including (among other things) +key columns to be used for common random number generation, the simulation +start and end times, step size, and the population size. + +.. code-block:: yaml + + configuration: + randomness: + key_columns: ['entrance_time', 'age'] + time: + start: + year: 2022 + month: 1 + day: 1 + end: + year: 2026 + month: 12 + day: 31 + step_size: 0.5 # Days + population: + population_size: 100_000 + age_start: 0 + age_end: 5 diff --git a/docs/source/concepts/results.rst b/docs/source/concepts/results.rst index 990e78f04..1e39f09fa 100644 --- a/docs/source/concepts/results.rst +++ b/docs/source/concepts/results.rst @@ -1,9 +1,363 @@ .. _results_concept: ================== -Simulation Results +Results Management ================== -.. todo:: +.. contents:: + :depth: 2 + :local: + :backlinks: none - Everything here. \ No newline at end of file +The results management system is responsible for collecting, formatting, and storing +results of a ``vivarium`` simulation. The results are then formatted and written +to disk at the end of the simulation during the :ref:`simulation end phase `. + +Main Concepts +------------- + +There are three main concepts that make up the results management system: +observers, observations, and stratifications. An **observer** registers desired +measure-specific results - referred to as **observations** - that may or may not +be grouped into **stratifications**. + +.. note:: + A ``vivarium`` simulation will *not* record results by default. The user must + define observers that register observations in order to record results! + +Observers ++++++++++ + +The :class:`Observer ` object is a +``vivarium`` :class:`Component ` and abstract base +class whose primary purpose is to register observations to the results system. +Ideally, each concrete observer class should register a single observation (though +this is not enforced). + +Observations +++++++++++++ + +When discussing the results system, an **observation** is used somewhat interchangeably +with the term "results". More specifically, an observation is a set of measure-specific +results that are collected throughout the simulation. + +Implementation-wise, an observation is a data structure that holds the values +and callables required to collect the results of a specific measure during the simulation. + +At the highest level, an observation can be considered either *stratified* or +*unstratified*. A +:class:`StratifiedObservation ` +is one whose results are grouped into and aggregated by categories referred to as +**stratifications**. An +:class:`UnstratifiedObservation ` +is one whose results are not grouped into categories. + +A couple other more specific and commonly used observations are provided as well: + +- :class:`AddingObservation `: + a specific type of + :class:`StratifiedObservation ` + that gathers new results and adds/sums them to any existing results. +- :class:`ConcatenatingObservation `: + a specific type of + :class:`UnstratifiedObservation ` + that gathers new results and concatenates them to any existing results. + +Ideally, all concrete classes should inherit from the +:class:`BaseObservation ` +abstract base class, which contains the common attributes between observation types: + +.. list-table:: **Common Observation Attributes** + :widths: 15 45 + :header-rows: 1 + + * - Attribute + - Description + * - | :attr:`name ` + - | Name of the observation. It will also be the name of the output results file + | for this particular observation. + * - | :attr:`pop_filter ` + - | A Pandas query filter string to filter the population down to the simulants + | who should be considered for the observation. + * - | :attr:`when ` + - | Name of the lifecycle phase the observation should happen. Valid values are: + | "time_step__prepare", "time_step", "time_step__cleanup", or "collect_metrics". + * - | :attr:`results_initializer ` + - | Method or function that initializes the raw observation results + | prior to starting the simulation. This could return, for example, an empty + | DataFrame or one with a complete set of stratifications as the index and + | all values set to 0.0. + * - | :attr:`results_gatherer ` + - | Method or function that gathers the new observation results. + * - | :attr:`results_updater ` + - | Method or function that updates existing raw observation results with newly + | gathered results. + * - | :attr:`results_formatter ` + - | Method or function that formats the raw observation results. + * - | :attr:`stratifications ` + - | Optional tuple of column names for the observation to stratify by. + * - | :attr:`to_observe ` + - | Method or function that determines whether to perform an observation on this Event. + +The **BaseObservation** also contains the +:meth:`observe ` +method which is called at each :ref:`event ` and :ref:`time step ` +to determine whether or not the observation should be recorded, and if so, gathers +the results and stores them in the results system. + +.. note:: + All four observation types discussed above inherit from the **BaseObservation** + abstract base class. What differentiates them are the assigned attributes + (e.g. defining the **results_updater** to be an adding method for the + **AddingObservation**) or adding other attributes as necessary (e.g. + adding a **stratifications**, **aggregator_sources**, and **aggregator** for + the **StratifiedObservation**). + +Stratifications ++++++++++++++++ + +A **stratification** is a way to group and aggregate results into categories. For +example, if you have an observation that records a certain measure but you want to +stratify the results by age groups, you can register a stratification containing a +mapper function that maps each simulant's age to an age group (e.g. 23.1 -> "20_to_25"). + +The :class:`Stratification ` +class is a data structure that holds the values and callables required to stratify the +results of an observation: + +.. list-table:: **Stratification Attributes** + :widths: 15 45 + :header-rows: 1 + + * - Attribute + - Description + * - | :attr:`name ` + - | Name of the stratification. + * - | :attr:`sources ` + - | A list of the columns and values needed as input for the `mapper`. + * - | :attr:`categories ` + - | Exhaustive list of all possible stratification values. + * - | :attr:`excluded_categories ` + - | List of possible stratification values to exclude from results processing. + | If None (the default), will use exclusions as defined in the configuration. + * - | :attr:`mapper ` + - | A callable that maps the columns and value pipelines specified by the + | `requires_columns` and `requires_values` arguments to the stratification + | categories. It can either map the entire population or an individual + | simulant. A simulation will fail if the `mapper` ever produces an invalid + | value. + * - | :attr:`is_vectorized ` + - | True if the `mapper` function will map the entire population, and False + | if it will only map a single simulant. + +Each **Stratification** also contains the +:meth:`stratify ` +method which is called at each :ref:`event ` and :ref:`time step ` +to use the **mapper** to map values in the **sources** columns to **categories** +(excluding any categories specified in **excluded_categories**). + +.. note:: + There are two types of supported stratifications: *unbinned* and *binned*; + both types are backed by an instance of **Stratification**. + +How to Use the Results Management System +---------------------------------------- + +The intended workflow for using the results system is to create an **observer** +and register **observations** with it. **Stratifications** can also be registered +if desired. + +Creating an Observer and Registering Observations +++++++++++++++++++++++++++++++++++++++++++++++++++ + +All **observers** should be concrete instances of the +:class:`Observer ` +abstract base class which guarantees that it is a proper ``vivarium`` +:class:`Component `. And while the user is free to +add whatever business logic is necessary, the primary goal of the component lies +in the :meth:`register_observations ` +method. This is a required method (indeed, it is an abstract method of the +**Observer** class) and is where the user should register all observations +(ideally one per observer). + +Observation registration methods exist on the simulation's +:class:`ResultsInterface ` and +can be accessed through the :ref:`builder `: + +- :meth:`builder.results.register_stratified_observation ` +- :meth:`builder.results.register_unstratified_observation ` +- :meth:`builder.results.register_adding_observation ` +- :meth:`builder.results.register_concatenating_observation ` + +For example, here is an an observer that records the number of deaths in a simulation +(defined completely through the "pop_filter" argument). That is, it records the +number of people that have died during the current time step and adds that number +to the existing number of people who have died from previous time steps. + +.. testcode:: + + from typing import Any, Optional + + import pandas as pd + + from vivarium.framework.engine import Builder + from vivarium.framework.results import Observer + + class DeathObserver(Observer): + + @property + def configuration_defaults(self) -> dict[str, Any]: + return { + "mortality": { + "life_expectancy": 80, + } + } + + @property + def columns_required(self) -> Optional[list[str]]: + return ["age", "alive"] + + def register_observations(self, builder: Builder) -> None: + builder.results.register_adding_observation( + name="total_population_dead", + requires_columns=["alive"], + pop_filter='alive == "dead"', + ) + +And here is an example of how you might create an observer that records new +births (defined completely through the "pop_filter" argument), concatenates them +to existing ones, and formats the data to only include specified state table columns +as well as adds a new one ("birth_date"). + +.. testcode:: + from datetime import datetime + + import pandas as pd + + from vivarium.framework.engine import Builder + from vivarium.framework.results import Observer + + class BirthObserver(Observer): + + COLUMNS = ["sex", "birth_weight", "gestational_age", "pregnancy_outcome"] + + def register_observations(self, builder: Builder) -> None: + builder.results.register_concatenating_observation( + name="births", + pop_filter=( + "(" + f"pregnancy_outcome == 'live_birth' " + f"or pregnancy_outcome == 'stillbirth'" + ") " + f"and previous_pregnancy == 'pregnant' " + f"and pregnancy == 'parturition'" + ), + requires_columns=self.COLUMNS, + results_formatter=self.format, + ) + + def format(self, measure: str, results: pd.DataFrame) -> pd.DataFrame: + new_births = results[self.COLUMNS] + new_births["birth_date"] = datetime(2024, 12, 30).strftime("%Y-%m-%d T%H:%M.%f") + return new_births + +As both of these examples are proper ``vivarium`` +:class:`Components `, they are added to the simulation +via the :ref:`model specification ` like any other component: + +.. code-block:: yaml + + components: + : + : # as many subdirectories as needed to fully define the path + - DeathObserver() + - BirthObserver() + +Stratifying Observations +++++++++++++++++++++++++ + +If you want to stratify the results of an **observation** (that is, group and +aggregate by designated categories), you can register a +:class:`Stratification ` +with the results system. Stratification registration methods exist on the simulation's +:class:`ResultsInterface ` and +can be accessed through the :ref:`builder `: + +- :meth:`builder.results.register_stratification ` +- :meth:`builder.results.register_binned_stratification ` + +Here is an example of how you might register a "pregnancy_outcome" stratification +as part of the **BirthObserver's** **register_observations** method: + +.. testcode:: + + from vivarium.framework.engine import Builder + from vivarium.framework.results import Observer + + class BirthObserver(Observer): + + ... + + def register_observations(self, builder: Builder) -> None: + builder.results.register_stratification( + "pregnancy_outcome", + ["live_birth", "stillbirth", "pregnancy", "parturition"], + requires_columns=["pregnancy_outcome"], + ) + ... # register observations + + ... + +.. note:: + It is somewhat common to encapsulate all stratification registrations in a single + class, though this is not enforced (as the example above demonstrates). + +Just because you've *registered* a stratification doesn't mean that the results will +actually *use* it. In order to use the stratification, you must add it to the +:ref:`model specification ` configuration block +using the "stratification" key. You can provide "default" stratifications which +will be used by all observations as well as observation-specific "include" and +"exclude" keys to further modify each observation's stratifications. + +For example, to use "age_group" and "sex" as default stratifications for *all* +observations and then customize the "births" observations to also include +"pregnancy_outcome" but not "age_group": + +.. code-block:: yaml + + configuration: + stratification: + default: + - 'age_group' + - 'sex' + births: + include: ['pregnancy_outcome'] + exclude: ['age_group'] + +.. note:: + All stratifications must be included as a list, even if there is only one. + +Excluding Categories from Results ++++++++++++++++++++++++++++++++++ + +It is also possible to exclude specific *categories* from results processing. For +example, perhaps we do not care about results for simulants in certain "age_groups" +or who have a certain "cause_of_death" or "disability". Excluding categories +is done by providing an "excluded_categories" key along with a *per observation* +list of categories to exclude within the :ref:`model specification's ` +stratification block. + +For example, to exclude "stillbirth" as a pregnancy outcome during results processing: + +.. code-block:: yaml + + configuration: + stratification: + default: + - 'age_group' + - 'sex' + births: + include: ['pregnancy_outcome'] + exclude: ['age_group'] + excluded_categories: + pregnancy_outcome: ['stillbirth'] diff --git a/docs/source/concepts/time.rst b/docs/source/concepts/time.rst index fdf8e127a..077e47440 100644 --- a/docs/source/concepts/time.rst +++ b/docs/source/concepts/time.rst @@ -9,14 +9,6 @@ Thinking about Time in the Simulation :local: :backlinks: none -Outline -------- - - The Simulation Clock - - Start, stop, and step size. - - Fundamental assumptions about discrete time simulation. - - clock time vs. event time - - Individual Clocks - The Simulation Clock -------------------- The :class:`SimulationClock ` plugin manages the progression of time throughout the simulation. diff --git a/src/vivarium/framework/results/context.py b/src/vivarium/framework/results/context.py index f7d23d0a5..b4bdeb034 100644 --- a/src/vivarium/framework/results/context.py +++ b/src/vivarium/framework/results/context.py @@ -204,7 +204,7 @@ def register_observation( A Pandas query filter string to filter the population down to the simulants who should be considered for the observation. when - String name of the lifecycle phase the observation should happen. Valid values are: + Name of the lifecycle phase the observation should happen. Valid values are: "time_step__prepare", "time_step", "time_step__cleanup", or "collect_metrics". **kwargs Additional keyword arguments to be passed to the observation's constructor. diff --git a/src/vivarium/framework/results/interface.py b/src/vivarium/framework/results/interface.py index 25eb9264d..eb669bef6 100644 --- a/src/vivarium/framework/results/interface.py +++ b/src/vivarium/framework/results/interface.py @@ -194,7 +194,7 @@ def register_stratified_observation( A Pandas query filter string to filter the population down to the simulants who should be considered for the observation. when - String name of the lifecycle phase the observation should happen. Valid values are: + Name of the lifecycle phase the observation should happen. Valid values are: "time_step__prepare", "time_step", "time_step__cleanup", or "collect_metrics". requires_columns List of the state table columns that are required by either the `pop_filter` or the `aggregator`. @@ -269,7 +269,7 @@ def register_unstratified_observation( A Pandas query filter string to filter the population down to the simulants who should be considered for the observation. when - String name of the lifecycle phase the observation should happen. Valid values are: + Name of the lifecycle phase the observation should happen. Valid values are: "time_step__prepare", "time_step", "time_step__cleanup", or "collect_metrics". requires_columns List of the state table columns that are required by either the `pop_filter` or the `aggregator`. @@ -337,7 +337,7 @@ def register_adding_observation( A Pandas query filter string to filter the population down to the simulants who should be considered for the observation. when - String name of the lifecycle phase the observation should happen. Valid values are: + Name of the lifecycle phase the observation should happen. Valid values are: "time_step__prepare", "time_step", "time_step__cleanup", or "collect_metrics". requires_columns List of the state table columns that are required by either the `pop_filter` or the `aggregator`. @@ -399,7 +399,7 @@ def register_concatenating_observation( A Pandas query filter string to filter the population down to the simulants who should be considered for the observation. when - String name of the lifecycle phase the observation should happen. Valid values are: + Name of the lifecycle phase the observation should happen. Valid values are: "time_step__prepare", "time_step", "time_step__cleanup", or "collect_metrics". requires_columns List of the state table columns that are required by either the `pop_filter` or the `aggregator`. diff --git a/src/vivarium/framework/results/manager.py b/src/vivarium/framework/results/manager.py index 25807899c..f67a547d4 100644 --- a/src/vivarium/framework/results/manager.py +++ b/src/vivarium/framework/results/manager.py @@ -322,7 +322,7 @@ def register_observation( A Pandas query filter string to filter the population down to the simulants who should be considered for the observation. when - String name of the lifecycle phase the observation should happen. Valid values are: + Name of the lifecycle phase the observation should happen. Valid values are: "time_step__prepare", "time_step", "time_step__cleanup", or "collect_metrics". requires_columns List of the state table columns that are required by either the `pop_filter` or the `aggregator`. diff --git a/src/vivarium/framework/results/observation.py b/src/vivarium/framework/results/observation.py index d31062167..a9092fd1d 100644 --- a/src/vivarium/framework/results/observation.py +++ b/src/vivarium/framework/results/observation.py @@ -47,7 +47,7 @@ class BaseObservation(ABC): """A Pandas query filter string to filter the population down to the simulants who should be considered for the observation.""" when: str - """String name of the lifecycle phase the observation should happen. Valid values are: + """Name of the lifecycle phase the observation should happen. Valid values are: "time_step__prepare", "time_step", "time_step__cleanup", or "collect_metrics".""" results_initializer: Callable[[Iterable[str], Iterable[Stratification]], pd.DataFrame] """Method or function that initializes the raw observation results @@ -100,7 +100,7 @@ class UnstratifiedObservation(BaseObservation): A Pandas query filter string to filter the population down to the simulants who should be considered for the observation. when - String name of the lifecycle phase the observation should happen. Valid values are: + Name of the lifecycle phase the observation should happen. Valid values are: "time_step__prepare", "time_step", "time_step__cleanup", or "collect_metrics". results_gatherer Method or function that gathers the new observation results. @@ -159,7 +159,7 @@ class StratifiedObservation(BaseObservation): A Pandas query filter string to filter the population down to the simulants who should be considered for the observation. when - String name of the lifecycle phase the observation should happen. Valid values are: + Name of the lifecycle phase the observation should happen. Valid values are: "time_step__prepare", "time_step", "time_step__cleanup", or "collect_metrics". results_updater Method or function that updates existing raw observation results with newly gathered results. @@ -316,7 +316,7 @@ class AddingObservation(StratifiedObservation): A Pandas query filter string to filter the population down to the simulants who should be considered for the observation. when - String name of the lifecycle phase the observation should happen. Valid values are: + Name of the lifecycle phase the observation should happen. Valid values are: "time_step__prepare", "time_step", "time_step__cleanup", or "collect_metrics". results_formatter Method or function that formats the raw observation results. @@ -391,7 +391,7 @@ class ConcatenatingObservation(UnstratifiedObservation): A Pandas query filter string to filter the population down to the simulants who should be considered for the observation. when - String name of the lifecycle phase the observation should happen. Valid values are: + Name of the lifecycle phase the observation should happen. Valid values are: "time_step__prepare", "time_step", "time_step__cleanup", or "collect_metrics". included_columns Columns to include in the observation diff --git a/src/vivarium/framework/results/stratification.py b/src/vivarium/framework/results/stratification.py index 765a047ac..455d5b070 100644 --- a/src/vivarium/framework/results/stratification.py +++ b/src/vivarium/framework/results/stratification.py @@ -84,9 +84,9 @@ def __post_init__(self) -> None: raise ValueError("The sources argument must be non-empty.") def stratify(self, population: pd.DataFrame) -> pd.Series: - """Apply the mapper to the population `sources` columns to create a new - Series to be added to the population. Any excluded categories - (which have already been removed from self.categories) will be converted + """Apply the `mapper` to the population `sources` columns to create a new + Series to be added to the population. Any `excluded_categories` + (which have already been removed from `categories`) will be converted to NaNs in the new column and dropped later at the observation level. Parameters From cbed10b0ca9b24114890a456bf62e23b6aafcd78 Mon Sep 17 00:00:00 2001 From: Steve Bachmeier Date: Wed, 21 Aug 2024 08:39:46 -0700 Subject: [PATCH 4/8] minor updates from review --- .../concepts/model_specification/index.rst | 23 +- docs/source/concepts/results.rst | 273 +++++++++--------- 2 files changed, 151 insertions(+), 145 deletions(-) diff --git a/docs/source/concepts/model_specification/index.rst b/docs/source/concepts/model_specification/index.rst index 16efb98a2..83bf439a2 100644 --- a/docs/source/concepts/model_specification/index.rst +++ b/docs/source/concepts/model_specification/index.rst @@ -4,6 +4,16 @@ The Model Specification ======================= +.. contents:: + :depth: 2 + :local: + :backlinks: none + +.. toctree:: + :hidden: + + yaml_basics + A :term:`model specification ` is a complete representation of a :mod:`vivarium` simulation formatted as a yaml file. @@ -19,15 +29,11 @@ Each of these blocks is delineated by a top-level key in the yaml file: You can find a short intro to yaml basics :ref:`here `. -.. contents:: - :depth: 2 - :local: - :backlinks: none - The Plugins Block ----------------- -TBD +.. todo:: + describe plugins The Components Block -------------------- @@ -90,11 +96,6 @@ call on either of the above yaml components block examples would be a list containing three instantiated objects: a population object, a mortality object, and a diarrhea disease model. -.. toctree:: - :hidden: - - yaml_basics - The Configuration Block ----------------------- diff --git a/docs/source/concepts/results.rst b/docs/source/concepts/results.rst index 1e39f09fa..ab1897a4b 100644 --- a/docs/source/concepts/results.rst +++ b/docs/source/concepts/results.rst @@ -25,140 +25,10 @@ be grouped into **stratifications**. A ``vivarium`` simulation will *not* record results by default. The user must define observers that register observations in order to record results! -Observers -+++++++++ - -The :class:`Observer ` object is a -``vivarium`` :class:`Component ` and abstract base -class whose primary purpose is to register observations to the results system. -Ideally, each concrete observer class should register a single observation (though -this is not enforced). - -Observations -++++++++++++ - -When discussing the results system, an **observation** is used somewhat interchangeably -with the term "results". More specifically, an observation is a set of measure-specific -results that are collected throughout the simulation. - -Implementation-wise, an observation is a data structure that holds the values -and callables required to collect the results of a specific measure during the simulation. - -At the highest level, an observation can be considered either *stratified* or -*unstratified*. A -:class:`StratifiedObservation ` -is one whose results are grouped into and aggregated by categories referred to as -**stratifications**. An -:class:`UnstratifiedObservation ` -is one whose results are not grouped into categories. - -A couple other more specific and commonly used observations are provided as well: - -- :class:`AddingObservation `: - a specific type of - :class:`StratifiedObservation ` - that gathers new results and adds/sums them to any existing results. -- :class:`ConcatenatingObservation `: - a specific type of - :class:`UnstratifiedObservation ` - that gathers new results and concatenates them to any existing results. - -Ideally, all concrete classes should inherit from the -:class:`BaseObservation ` -abstract base class, which contains the common attributes between observation types: - -.. list-table:: **Common Observation Attributes** - :widths: 15 45 - :header-rows: 1 - - * - Attribute - - Description - * - | :attr:`name ` - - | Name of the observation. It will also be the name of the output results file - | for this particular observation. - * - | :attr:`pop_filter ` - - | A Pandas query filter string to filter the population down to the simulants - | who should be considered for the observation. - * - | :attr:`when ` - - | Name of the lifecycle phase the observation should happen. Valid values are: - | "time_step__prepare", "time_step", "time_step__cleanup", or "collect_metrics". - * - | :attr:`results_initializer ` - - | Method or function that initializes the raw observation results - | prior to starting the simulation. This could return, for example, an empty - | DataFrame or one with a complete set of stratifications as the index and - | all values set to 0.0. - * - | :attr:`results_gatherer ` - - | Method or function that gathers the new observation results. - * - | :attr:`results_updater ` - - | Method or function that updates existing raw observation results with newly - | gathered results. - * - | :attr:`results_formatter ` - - | Method or function that formats the raw observation results. - * - | :attr:`stratifications ` - - | Optional tuple of column names for the observation to stratify by. - * - | :attr:`to_observe ` - - | Method or function that determines whether to perform an observation on this Event. - -The **BaseObservation** also contains the -:meth:`observe ` -method which is called at each :ref:`event ` and :ref:`time step ` -to determine whether or not the observation should be recorded, and if so, gathers -the results and stores them in the results system. - .. note:: - All four observation types discussed above inherit from the **BaseObservation** - abstract base class. What differentiates them are the assigned attributes - (e.g. defining the **results_updater** to be an adding method for the - **AddingObservation**) or adding other attributes as necessary (e.g. - adding a **stratifications**, **aggregator_sources**, and **aggregator** for - the **StratifiedObservation**). - -Stratifications -+++++++++++++++ - -A **stratification** is a way to group and aggregate results into categories. For -example, if you have an observation that records a certain measure but you want to -stratify the results by age groups, you can register a stratification containing a -mapper function that maps each simulant's age to an age group (e.g. 23.1 -> "20_to_25"). - -The :class:`Stratification ` -class is a data structure that holds the values and callables required to stratify the -results of an observation: - -.. list-table:: **Stratification Attributes** - :widths: 15 45 - :header-rows: 1 - - * - Attribute - - Description - * - | :attr:`name ` - - | Name of the stratification. - * - | :attr:`sources ` - - | A list of the columns and values needed as input for the `mapper`. - * - | :attr:`categories ` - - | Exhaustive list of all possible stratification values. - * - | :attr:`excluded_categories ` - - | List of possible stratification values to exclude from results processing. - | If None (the default), will use exclusions as defined in the configuration. - * - | :attr:`mapper ` - - | A callable that maps the columns and value pipelines specified by the - | `requires_columns` and `requires_values` arguments to the stratification - | categories. It can either map the entire population or an individual - | simulant. A simulation will fail if the `mapper` ever produces an invalid - | value. - * - | :attr:`is_vectorized ` - - | True if the `mapper` function will map the entire population, and False - | if it will only map a single simulant. - -Each **Stratification** also contains the -:meth:`stratify ` -method which is called at each :ref:`event ` and :ref:`time step ` -to use the **mapper** to map values in the **sources** columns to **categories** -(excluding any categories specified in **excluded_categories**). - -.. note:: - There are two types of supported stratifications: *unbinned* and *binned*; - both types are backed by an instance of **Stratification**. + Users should not interact with observations and stratifications directly - + they should only be created by the methods provided by the + :class:`ResultsInterface `. How to Use the Results Management System ---------------------------------------- @@ -173,7 +43,7 @@ Creating an Observer and Registering Observations All **observers** should be concrete instances of the :class:`Observer ` abstract base class which guarantees that it is a proper ``vivarium`` -:class:`Component `. And while the user is free to +:class:`Component `. While the user is free to add whatever business logic is necessary, the primary goal of the component lies in the :meth:`register_observations ` method. This is a required method (indeed, it is an abstract method of the @@ -361,3 +231,138 @@ For example, to exclude "stillbirth" as a pregnancy outcome during results proce exclude: ['age_group'] excluded_categories: pregnancy_outcome: ['stillbirth'] + +Observers +--------- + +The :class:`Observer ` object is a +``vivarium`` :class:`Component ` and abstract base +class whose primary purpose is to register observations to the results system. +Ideally, each concrete observer class should register a single observation (though +this is not enforced). + +Observations +------------ + +When discussing the results system, an **observation** is used somewhat interchangeably +with the term "results". More specifically, an observation is a set of measure-specific +results that are collected throughout the simulation. + +Implementation-wise, an observation is a data structure that holds the values +and callables required to collect the results of a specific measure during the simulation. + +At the highest level, an observation can be considered either *stratified* or +*unstratified*. A +:class:`StratifiedObservation ` +is one whose results are grouped into and aggregated by categories referred to as +**stratifications**. An +:class:`UnstratifiedObservation ` +is one whose results are not grouped into categories. + +A couple other more specific and commonly used observations are provided as well: + +- :class:`AddingObservation `: + a specific type of + :class:`StratifiedObservation ` + that gathers new results and adds/sums them to any existing results. +- :class:`ConcatenatingObservation `: + a specific type of + :class:`UnstratifiedObservation ` + that gathers new results and concatenates them to any existing results. + +Ideally, all concrete classes should inherit from the +:class:`BaseObservation ` +abstract base class, which contains the common attributes between observation types: + +.. list-table:: **Common Observation Attributes** + :widths: 15 45 + :header-rows: 1 + + * - Attribute + - Description + * - | :attr:`name ` + - | Name of the observation. It will also be the name of the output results file + | for this particular observation. + * - | :attr:`pop_filter ` + - | A Pandas query filter string to filter the population down to the simulants + | who should be considered for the observation. + * - | :attr:`when ` + - | Name of the lifecycle phase the observation should happen. Valid values are: + | "time_step__prepare", "time_step", "time_step__cleanup", or "collect_metrics". + * - | :attr:`results_initializer ` + - | Method or function that initializes the raw observation results + | prior to starting the simulation. This could return, for example, an empty + | DataFrame or one with a complete set of stratifications as the index and + | all values set to 0.0. + * - | :attr:`results_gatherer ` + - | Method or function that gathers the new observation results. + * - | :attr:`results_updater ` + - | Method or function that updates existing raw observation results with newly + | gathered results. + * - | :attr:`results_formatter ` + - | Method or function that formats the raw observation results. + * - | :attr:`stratifications ` + - | Optional tuple of column names for the observation to stratify by. + * - | :attr:`to_observe ` + - | Method or function that determines whether to perform an observation on this Event. + +The **BaseObservation** also contains the +:meth:`observe ` +method which is called at each :ref:`event ` and :ref:`time step ` +to determine whether or not the observation should be recorded, and if so, gathers +the results and stores them in the results system. + +.. note:: + All four observation types discussed above inherit from the **BaseObservation** + abstract base class. What differentiates them are the assigned attributes + (e.g. defining the **results_updater** to be an adding method for the + **AddingObservation**) or adding other attributes as necessary (e.g. + adding a **stratifications**, **aggregator_sources**, and **aggregator** for + the **StratifiedObservation**). + +Stratifications +--------------- + +A **stratification** is a way to group and aggregate results into categories. For +example, if you have an observation that records a certain measure but you want to +stratify the results by age groups, you can register a stratification containing a +mapper function that maps each simulant's age to an age group (e.g. 23.1 -> "20_to_25"). + +The :class:`Stratification ` +class is a data structure that holds the values and callables required to stratify the +results of an observation: + +.. list-table:: **Stratification Attributes** + :widths: 15 45 + :header-rows: 1 + + * - Attribute + - Description + * - | :attr:`name ` + - | Name of the stratification. + * - | :attr:`sources ` + - | A list of the columns and values needed as input for the `mapper`. + * - | :attr:`categories ` + - | Exhaustive list of all possible stratification values. + * - | :attr:`excluded_categories ` + - | List of possible stratification values to exclude from results processing. + | If None (the default), will use exclusions as defined in the configuration. + * - | :attr:`mapper ` + - | A callable that maps the columns and value pipelines specified by the + | `requires_columns` and `requires_values` arguments to the stratification + | categories. It can either map the entire population or an individual + | simulant. A simulation will fail if the `mapper` ever produces an invalid + | value. + * - | :attr:`is_vectorized ` + - | True if the `mapper` function will map the entire population, and False + | if it will only map a single simulant. + +Each **Stratification** also contains the +:meth:`stratify ` +method which is called at each :ref:`event ` and :ref:`time step ` +to use the **mapper** to map values in the **sources** columns to **categories** +(excluding any categories specified in **excluded_categories**). + +.. note:: + There are two types of supported stratifications: *unbinned* and *binned*; + both types are backed by an instance of **Stratification**. From 54faa75ae3d5e276d7b7b453a36d02d074afb8c0 Mon Sep 17 00:00:00 2001 From: Steve Bachmeier Date: Wed, 21 Aug 2024 08:44:38 -0700 Subject: [PATCH 5/8] update exclusions example --- docs/source/concepts/results.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/source/concepts/results.rst b/docs/source/concepts/results.rst index ab1897a4b..261796a28 100644 --- a/docs/source/concepts/results.rst +++ b/docs/source/concepts/results.rst @@ -190,8 +190,8 @@ will be used by all observations as well as observation-specific "include" and "exclude" keys to further modify each observation's stratifications. For example, to use "age_group" and "sex" as default stratifications for *all* -observations and then customize the "births" observations to also include -"pregnancy_outcome" but not "age_group": +observations and then customize "deaths" observations to also include +"location" but not "age_group": .. code-block:: yaml @@ -201,7 +201,7 @@ observations and then customize the "births" observations to also include - 'age_group' - 'sex' births: - include: ['pregnancy_outcome'] + include: ['location'] exclude: ['age_group'] .. note:: From a237a6a72b71d3793bc59e2376d4463d71d73dee Mon Sep 17 00:00:00 2001 From: Steve Bachmeier Date: Wed, 21 Aug 2024 08:49:12 -0700 Subject: [PATCH 6/8] typo --- docs/source/concepts/results.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/concepts/results.rst b/docs/source/concepts/results.rst index 261796a28..47ff8e50f 100644 --- a/docs/source/concepts/results.rst +++ b/docs/source/concepts/results.rst @@ -200,7 +200,7 @@ observations and then customize "deaths" observations to also include default: - 'age_group' - 'sex' - births: + deaths: include: ['location'] exclude: ['age_group'] From bd64352fe593f7baa24a4123c3cd13cdc3156d4b Mon Sep 17 00:00:00 2001 From: Steve Bachmeier Date: Wed, 21 Aug 2024 13:37:24 -0700 Subject: [PATCH 7/8] change stratification registration example --- docs/source/concepts/results.rst | 56 ++++++++++++++++++++++++-------- 1 file changed, 43 insertions(+), 13 deletions(-) diff --git a/docs/source/concepts/results.rst b/docs/source/concepts/results.rst index 47ff8e50f..b4fae1131 100644 --- a/docs/source/concepts/results.rst +++ b/docs/source/concepts/results.rst @@ -156,31 +156,61 @@ can be accessed through the :ref:`builder `: - :meth:`builder.results.register_stratification ` - :meth:`builder.results.register_binned_stratification ` -Here is an example of how you might register a "pregnancy_outcome" stratification -as part of the **BirthObserver's** **register_observations** method: +Here is an example of how you might register a "current_year" and "sex" as stratifications: .. testcode:: + import pandas as pd + from vivarium import Component from vivarium.framework.engine import Builder - from vivarium.framework.results import Observer - class BirthObserver(Observer): + class ResultsStratifier(Component): + """Register stratifications for the results system""" - ... + def setup(self, builder: Builder) -> None: + self.start_year = builder.configuration.time.start.year + self.end_year = builder.configuration.time.end.year + self.register_stratifications(builder) - def register_observations(self, builder: Builder) -> None: + def register_stratifications(self, builder: Builder) -> None: builder.results.register_stratification( - "pregnancy_outcome", - ["live_birth", "stillbirth", "pregnancy", "parturition"], - requires_columns=["pregnancy_outcome"], + "current_year", + [str(year) for year in range(self.start_year, self.end_year + 1)], + mapper=self.map_year, + is_vectorized=True, + requires_columns=["current_time"], ) - ... # register observations + builder.results.register_stratification( + "sex", ["Female", "Male"], requires_columns=["sex"] + ) + + ########### + # Mappers # + ########### + + @staticmethod + def map_year(pop: pd.DataFrame) -> pd.Series[str]: + """Map datetime with year + + Parameters + ---------- + pop + A pd.DataFrame with one column, a datetime to be mapped to year - ... + Returns + ------ + pandas.Series + A pd.Series with years corresponding to the pop passed into the function + """ + return pop.squeeze(axis=1).dt.year.apply(str) .. note:: - It is somewhat common to encapsulate all stratification registrations in a single - class, though this is not enforced (as the example above demonstrates). + Good encapsulation suggests that all stratification registrations occur in a single + class (as in the **ResultsStratifier** class in the above example). This is not + enforced, however, and it is also somewhat common to register a stratification + that will only be used by a single observer within that observer's + :meth:`register_observations ` + method. Just because you've *registered* a stratification doesn't mean that the results will actually *use* it. In order to use the stratification, you must add it to the From 9cfaf5984169ce55a304599ccadab5fcf03acbf4 Mon Sep 17 00:00:00 2001 From: Steve Bachmeier Date: Wed, 21 Aug 2024 13:42:49 -0700 Subject: [PATCH 8/8] fix breaking doctest --- docs/source/concepts/results.rst | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/docs/source/concepts/results.rst b/docs/source/concepts/results.rst index b4fae1131..3581aed59 100644 --- a/docs/source/concepts/results.rst +++ b/docs/source/concepts/results.rst @@ -100,6 +100,7 @@ to existing ones, and formats the data to only include specified state table col as well as adds a new one ("birth_date"). .. testcode:: + from datetime import datetime import pandas as pd @@ -159,6 +160,7 @@ can be accessed through the :ref:`builder `: Here is an example of how you might register a "current_year" and "sex" as stratifications: .. testcode:: + import pandas as pd from vivarium import Component @@ -189,20 +191,20 @@ Here is an example of how you might register a "current_year" and "sex" as strat ########### @staticmethod - def map_year(pop: pd.DataFrame) -> pd.Series[str]: - """Map datetime with year - - Parameters - ---------- - pop - A pd.DataFrame with one column, a datetime to be mapped to year - - Returns - ------ - pandas.Series - A pd.Series with years corresponding to the pop passed into the function - """ - return pop.squeeze(axis=1).dt.year.apply(str) + def map_year(pop: pd.DataFrame) -> pd.Series: + """Map datetime with year + + Parameters + ---------- + pop + A pd.DataFrame with one column, a datetime to be mapped to year + + Returns + ------ + pandas.Series + A pd.Series with years corresponding to the pop passed into the function + """ + return pop.squeeze(axis=1).dt.year.apply(str) .. note:: Good encapsulation suggests that all stratification registrations occur in a single