From 09a5a863757bc6998f32834783abb924f69076cd Mon Sep 17 00:00:00 2001 From: sgbaird Date: Thu, 28 Jul 2022 20:16:02 -0600 Subject: [PATCH 1/6] error checking for duplicity From 392a2f8316a099e5e7bc4f110852203fead539ee Mon Sep 17 00:00:00 2001 From: sgbaird Date: Thu, 28 Jul 2022 20:16:29 -0600 Subject: [PATCH 2/6] Update core.py --- src/matbench_genmetrics/core.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/matbench_genmetrics/core.py b/src/matbench_genmetrics/core.py index dae76a4..7963873 100644 --- a/src/matbench_genmetrics/core.py +++ b/src/matbench_genmetrics/core.py @@ -123,6 +123,7 @@ def match_rate(self): def duplicity_counts(self): if self.num_test != self.num_gen: raise ValueError("Test and gen sets should be identical.") + # TODO: assert that test and gen sets are identical return np.clip(self.match_counts - 1, 0, None) @property From b654ff9d483c67be3ecfa6662cb5ee883273948e Mon Sep 17 00:00:00 2001 From: sgbaird Date: Thu, 28 Jul 2022 20:23:15 -0600 Subject: [PATCH 3/6] divide by 2 --- src/matbench_genmetrics/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/matbench_genmetrics/core.py b/src/matbench_genmetrics/core.py index 7963873..44c4a61 100644 --- a/src/matbench_genmetrics/core.py +++ b/src/matbench_genmetrics/core.py @@ -124,7 +124,7 @@ def duplicity_counts(self): if self.num_test != self.num_gen: raise ValueError("Test and gen sets should be identical.") # TODO: assert that test and gen sets are identical - return np.clip(self.match_counts - 1, 0, None) + return np.clip(self.match_counts - 1, 0, None) / 2 @property def duplicity_count(self): From 3fb133e69e3dfbe5f16366448a875b1a30284090 Mon Sep 17 00:00:00 2001 From: sgbaird Date: Thu, 28 Jul 2022 20:23:24 -0600 Subject: [PATCH 4/6] Update 1.0-matbench-genmetrics-basic.ipynb --- notebooks/1.0-matbench-genmetrics-basic.ipynb | 294 +++++++++++++----- 1 file changed, 210 insertions(+), 84 deletions(-) diff --git a/notebooks/1.0-matbench-genmetrics-basic.ipynb b/notebooks/1.0-matbench-genmetrics-basic.ipynb index 6ec209b..f389cca 100644 --- a/notebooks/1.0-matbench-genmetrics-basic.ipynb +++ b/notebooks/1.0-matbench-genmetrics-basic.ipynb @@ -21,7 +21,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 9, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -50,7 +50,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 11, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -64,234 +64,316 @@ "output_type": "stream", "text": [ "Reading file c:\\Users\\sterg\\Miniconda3\\envs\\matbench-genmetrics\\lib\\site-packages\\mp_time_split\\utils\\mp_dummy_time_summary.json.gz: 0it [00:00, ?it/s]0, ?it/s]\n", - "Decoding objects from c:\\Users\\sterg\\Miniconda3\\envs\\matbench-genmetrics\\lib\\site-packages\\mp_time_split\\utils\\mp_dummy_time_summary.json.gz: 100%|##########| 11/11 [00:00<00:00, 697.03it/s]\n", + "Decoding objects from c:\\Users\\sterg\\Miniconda3\\envs\\matbench-genmetrics\\lib\\site-packages\\mp_time_split\\utils\\mp_dummy_time_summary.json.gz: 100%|##########| 11/11 [00:00<00:00, 846.42it/s]\n", "\n", "\u001b[A\n", "\n", - "100%|██████████| 10/10 [00:00<00:00, 2500.33it/s]\n", + "100%|██████████| 10/10 [00:00<00:00, 2498.39it/s]\n", "100%|██████████| 1/1 [00:00<00:00, 142.84it/s]\n", "\n", "\u001b[A\n", "\n", - "100%|██████████| 10/10 [00:00<00:00, 3331.19it/s]\n", + "100%|██████████| 10/10 [00:00<00:00, 2502.72it/s]\n", "\n", "\n", - "100%|██████████| 10/10 [00:00<00:00, 2502.42it/s]\n", + "100%|██████████| 10/10 [00:00<00:00, 1998.91it/s]\n", "\n", "\n", - "100%|██████████| 10/10 [00:00<00:00, 2499.44it/s]\n", - "100%|██████████| 3/3 [00:00<00:00, 121.21it/s]\n", + "100%|██████████| 10/10 [00:00<00:00, 2500.03it/s]\n", + "100%|██████████| 3/3 [00:00<00:00, 115.39it/s]\n", "\n", "\u001b[A\n", "\n", "\u001b[A\u001b[A\n", "\n", - "100%|██████████| 10/10 [00:00<00:00, 67.11it/s]\n", + "100%|██████████| 10/10 [00:00<00:00, 65.79it/s]\n", "\n", "\u001b[A\n", "\n", "\u001b[A\u001b[A\n", "\n", - "100%|██████████| 10/10 [00:00<00:00, 76.63it/s]\n", + "\u001b[A\u001b[A\n", + "\n", + "100%|██████████| 10/10 [00:00<00:00, 30.67it/s]\n", "\n", "\u001b[A\n", "\n", "\u001b[A\u001b[A\n", "\n", - "100%|██████████| 10/10 [00:00<00:00, 71.66it/s]\n", + "100%|██████████| 10/10 [00:00<00:00, 59.17it/s]\n", "\n", "\u001b[A\n", "\n", - "100%|██████████| 10/10 [00:00<00:00, 114.95it/s]\n", + "\u001b[A\u001b[A\n", + "\n", + "\u001b[A\u001b[A\n", "\n", + "100%|██████████| 10/10 [00:00<00:00, 26.45it/s]\n", + "\n", + "\u001b[A\n", + "\n", + "\u001b[A\u001b[A\n", "\n", "\u001b[A\u001b[A\n", "\n", - "100%|██████████| 10/10 [00:00<00:00, 88.93it/s]\n", + "\u001b[A\u001b[A\n", + "\n", + "100%|██████████| 10/10 [00:00<00:00, 23.15it/s]\n", "\n", "\u001b[A\n", "\n", "\u001b[A\u001b[A\n", "\n", - "100%|██████████| 10/10 [00:00<00:00, 82.54it/s]\n", + "\u001b[A\u001b[A\n", + "\n", + "\u001b[A\u001b[A\n", + "\n", + "100%|██████████| 10/10 [00:00<00:00, 23.96it/s]\n", "\n", "\u001b[A\n", "\n", "\u001b[A\u001b[A\n", "\n", - "100%|██████████| 10/10 [00:00<00:00, 87.42it/s]\n", + "100%|██████████| 10/10 [00:00<00:00, 79.42it/s]\n", "\n", "\u001b[A\n", "\n", "\u001b[A\u001b[A\n", "\n", - "100%|██████████| 10/10 [00:00<00:00, 61.36it/s]\n", + "100%|██████████| 10/10 [00:00<00:00, 66.39it/s]\n", "\n", "\u001b[A\n", "\n", "\u001b[A\u001b[A\n", "\n", - "100%|██████████| 10/10 [00:00<00:00, 64.94it/s]\n", + "100%|██████████| 10/10 [00:00<00:00, 41.51it/s]\n", "\n", "\u001b[A\n", "\n", "\u001b[A\u001b[A\n", "\n", - "100%|██████████| 10/10 [00:00<00:00, 99.96it/s]\n", + "\u001b[A\u001b[A\n", + "\n", + "100%|██████████| 10/10 [00:00<00:00, 30.60it/s]\n", "\n", - "100%|██████████| 10/10 [00:01<00:00, 7.46it/s]\n", + "100%|██████████| 10/10 [00:02<00:00, 3.61it/s]\n", "Reading file c:\\Users\\sterg\\Miniconda3\\envs\\matbench-genmetrics\\lib\\site-packages\\mp_time_split\\utils\\mp_dummy_time_summary.json.gz: 0it [00:00, ?it/s]0, ?it/s]\n", - "Decoding objects from c:\\Users\\sterg\\Miniconda3\\envs\\matbench-genmetrics\\lib\\site-packages\\mp_time_split\\utils\\mp_dummy_time_summary.json.gz: 100%|##########| 11/11 [00:00<00:00, 704.10it/s]\n", + "Decoding objects from c:\\Users\\sterg\\Miniconda3\\envs\\matbench-genmetrics\\lib\\site-packages\\mp_time_split\\utils\\mp_dummy_time_summary.json.gz: 100%|##########| 11/11 [00:00<00:00, 999.99it/s]\n", "\n", "\u001b[A\n", "\n", - "100%|██████████| 10/10 [00:00<00:00, 3331.99it/s]\n", - "100%|██████████| 1/1 [00:00<00:00, 142.80it/s]\n", + "100%|██████████| 10/10 [00:00 Date: Thu, 28 Jul 2022 20:42:42 -0600 Subject: [PATCH 5/6] verbose kwarg in MPTSMetrics --- src/matbench_genmetrics/core.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/matbench_genmetrics/core.py b/src/matbench_genmetrics/core.py index 44c4a61..64c685c 100644 --- a/src/matbench_genmetrics/core.py +++ b/src/matbench_genmetrics/core.py @@ -220,8 +220,9 @@ def metrics(self): class MPTSMetrics(GenMetrics): - def __init__(self, dummy=False): + def __init__(self, dummy=False, verbose=True): self.dummy = dummy + self.verbose = verbose self.mpt = MPTimeSplit(target="energy_above_hull") self.folds = self.mpt.folds self.recorded_metrics = [None] * len(self.folds) @@ -247,6 +248,7 @@ def evaluate_and_record(self, fold, gen_structures, test_pred_structures=None): self.val_inputs.tolist(), gen_structures, test_pred_structures=test_pred_structures, + verbose=self.verbose, ) self.recorded_metrics[fold] = self.metrics From ff2f238be3c76afa49fe6f7581e024da9542d5b4 Mon Sep 17 00:00:00 2001 From: sgbaird Date: Thu, 28 Jul 2022 20:43:57 -0600 Subject: [PATCH 6/6] only check against 3 structures --- tests/test_matbench_genmetrics.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_matbench_genmetrics.py b/tests/test_matbench_genmetrics.py index 4dda7b0..b47351a 100644 --- a/tests/test_matbench_genmetrics.py +++ b/tests/test_matbench_genmetrics.py @@ -109,14 +109,14 @@ def test_numerical_attributes(fixture: object, checkitem: Tuple[str, npt.ArrayLi def test_mpts_metrics(): - mptm = MPTSMetrics(dummy=True) + mptm = MPTSMetrics(dummy=True, verbose=False) for fold in mptm.folds: train_val_inputs = mptm.get_train_and_val_data(fold) np.random.seed(10) dg = DummyGenerator() dg.fit(train_val_inputs) - gen_structures = dg.gen(n=100) + gen_structures = dg.gen(n=3) mptm.evaluate_and_record(fold, gen_structures)