From af85e8bb808f7fccb19c98e2a6be6935f0345b61 Mon Sep 17 00:00:00 2001
From: Steve Bachmeier <23350991+stevebachmeier@users.noreply.github.com>
Date: Tue, 16 Jul 2024 15:44:22 -0600
Subject: [PATCH] raise when stratification has duplicate categories (#450)

---
 src/vivarium/framework/results/context.py |  7 ++++++
 tests/framework/results/test_context.py   | 27 ++++++++++++++++++++---
 2 files changed, 31 insertions(+), 3 deletions(-)

diff --git a/src/vivarium/framework/results/context.py b/src/vivarium/framework/results/context.py
index 2fff1980f..1d3b3977f 100644
--- a/src/vivarium/framework/results/context.py
+++ b/src/vivarium/framework/results/context.py
@@ -92,6 +92,13 @@ def add_stratification(
             raise ValueError(
                 f"Stratification name '{name}' is already used: {str(already_used[0])}."
             )
+        unique_categories = set(categories)
+        if len(categories) != len(unique_categories):
+            for category in unique_categories:
+                categories.remove(category)
+            raise ValueError(
+                f"Found duplicate categories in stratification '{name}': {categories}."
+            )
         stratification = Stratification(name, sources, categories, mapper, is_vectorized)
         self.stratifications.append(stratification)
 
diff --git a/tests/framework/results/test_context.py b/tests/framework/results/test_context.py
index dbbcae2cd..376c05f70 100644
--- a/tests/framework/results/test_context.py
+++ b/tests/framework/results/test_context.py
@@ -1,5 +1,6 @@
 import itertools
 import math
+import re
 from datetime import timedelta
 
 import pandas as pd
@@ -24,6 +25,11 @@
 )
 
 
+def _aggregate_state_person_time(x: pd.DataFrame) -> float:
+    """Helper aggregator function for observation testing"""
+    return len(x) * (28 / 365.25)
+
+
 @pytest.mark.parametrize(
     "name, sources, categories, mapper, is_vectorized",
     [
@@ -88,9 +94,24 @@ def test_add_stratifcation_duplicate_name_raises():
         ctx.add_stratification(NAME, [], [], None, False)
 
 
-def _aggregate_state_person_time(x: pd.DataFrame) -> float:
-    """Helper aggregator function for observation testing"""
-    return len(x) * (28 / 365.25)
+@pytest.mark.parametrize(
+    "duplicates",
+    [
+        ["slytherin"],
+        ["gryffindor", "slytherin"],
+    ],
+)
+def test_add_stratification_duplicate_category_raises(duplicates):
+    ctx = ResultsContext()
+    with pytest.raises(
+        ValueError,
+        match=re.escape(
+            f"Found duplicate categories in stratification '{NAME}': {duplicates}"
+        ),
+    ):
+        ctx.add_stratification(
+            NAME, SOURCES, CATEGORIES + duplicates, sorting_hat_vector, True
+        )
 
 
 @pytest.mark.parametrize(