From 444eacc20e18edec472ad1a673b90f57dc60266d Mon Sep 17 00:00:00 2001
From: Jimmy Lin <jimmylin@uwaterloo.ca>
Date: Tue, 19 Sep 2023 22:11:18 -0400
Subject: [PATCH] Fix TREC-COVID regressions (take 2) (#2201)

---
 .../generate_round4_doc2query_baselines.py       | 10 +++++-----
 .../generate_round5_doc2query_baselines.py       | 16 ++++++++--------
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/src/main/python/trec-covid/generate_round4_doc2query_baselines.py b/src/main/python/trec-covid/generate_round4_doc2query_baselines.py
index f80347d7e9..ad3ec5308c 100644
--- a/src/main/python/trec-covid/generate_round4_doc2query_baselines.py
+++ b/src/main/python/trec-covid/generate_round4_doc2query_baselines.py
@@ -251,21 +251,21 @@ def main():
             {'topics': 45, 'ndcg_cut_10': 0.6618, 'judged_cut_10': 0.8622, 'ndcg_cut_20': 0.6331,
              'judged_cut_20': 0.8444, 'map': 0.2974, 'recall_1000': 0.5847, 'judged_cut_1000': 0.3344},
         'expanded.anserini.covid-r4.abstract.qdel.bm25+rm3Rf.txt':
-            {'topics': 45, 'ndcg_cut_10': 0.7447, 'judged_cut_10': 0.8933, 'ndcg_cut_20': 0.7067,
-             'judged_cut_20': 0.8589, 'map': 0.3182, 'recall_1000': 0.5812, 'judged_cut_1000': 0.2904},
+            {'topics': 45, 'ndcg_cut_10': 0.7436, 'judged_cut_10': 0.8911, 'ndcg_cut_20': 0.7067,
+             'judged_cut_20': 0.8600, 'map': 0.3182, 'recall_1000': 0.5811, 'judged_cut_1000': 0.2904},
     }
     evaluate_runs(round4_cumulative_qrels, cumulative_runs, expected=expected_metrics, check_md5=check_md5_flag)
 
     expected_metrics = {
         'expanded.anserini.final-r4.fusion1.txt':
             {'topics': 45, 'ndcg_cut_10': 0.5395, 'judged_cut_10': 0.7222, 'ndcg_cut_20': 0.5115,
-             'judged_cut_20': 0.6944, 'map': 0.2498, 'recall_1000': 0.6717, 'judged_cut_1000': 0.1424},
+             'judged_cut_20': 0.6944, 'map': 0.2497, 'recall_1000': 0.6717, 'judged_cut_1000': 0.1424},
         'expanded.anserini.final-r4.fusion2.txt':
             {'topics': 45, 'ndcg_cut_10': 0.5630, 'judged_cut_10': 0.7444, 'ndcg_cut_20': 0.5175,
              'judged_cut_20': 0.6911, 'map': 0.2550, 'recall_1000': 0.6800, 'judged_cut_1000': 0.1434},
         'expanded.anserini.final-r4.rf.txt':
-            {'topics': 45, 'ndcg_cut_10': 0.6062, 'judged_cut_10': 0.7378, 'ndcg_cut_20': 0.5606,
-             'judged_cut_20': 0.6833, 'map': 0.2658, 'recall_1000': 0.6759, 'judged_cut_1000': 0.1284},
+            {'topics': 45, 'ndcg_cut_10': 0.6086, 'judged_cut_10': 0.7400, 'ndcg_cut_20': 0.5619,
+             'judged_cut_20': 0.6844, 'map': 0.2659, 'recall_1000': 0.6765, 'judged_cut_1000': 0.1284},
     }
     evaluate_runs(round4_qrels, final_runs, expected=expected_metrics, check_md5=check_md5_flag)
 
diff --git a/src/main/python/trec-covid/generate_round5_doc2query_baselines.py b/src/main/python/trec-covid/generate_round5_doc2query_baselines.py
index de88990932..57b9e8d59f 100644
--- a/src/main/python/trec-covid/generate_round5_doc2query_baselines.py
+++ b/src/main/python/trec-covid/generate_round5_doc2query_baselines.py
@@ -240,7 +240,7 @@ def main():
              'judged_cut_20': 0.5120, 'map': 0.1728, 'recall_1000': 0.4462, 'judged_cut_1000': 0.2059},
         'expanded.anserini.covid-r5.abstract.qdel.bm25.txt':
             {'topics': 50, 'ndcg_cut_10': 0.4548, 'judged_cut_10': 0.5000, 'ndcg_cut_20': 0.4260,
-             'judged_cut_20': 0.4880, 'map': 0.1742, 'recall_1000': 0.4527, 'judged_cut_1000': 0.2051},
+             'judged_cut_20': 0.4880, 'map': 0.1742, 'recall_1000': 0.4528, 'judged_cut_1000': 0.2051},
         'expanded.anserini.covid-r5.full-text.qq.bm25.txt':
             {'topics': 50, 'ndcg_cut_10': 0.4450, 'judged_cut_10': 0.6020, 'ndcg_cut_20': 0.4208,
              'judged_cut_20': 0.5820, 'map': 0.1801, 'recall_1000': 0.4473, 'judged_cut_1000': 0.2393},
@@ -260,8 +260,8 @@ def main():
             {'topics': 50, 'ndcg_cut_10': 0.4846, 'judged_cut_10': 0.5740, 'ndcg_cut_20': 0.4565,
              'judged_cut_20': 0.5400, 'map': 0.2045, 'recall_1000': 0.5218, 'judged_cut_1000': 0.2578},
         'expanded.anserini.covid-r5.abstract.qdel.bm25+rm3Rf.txt':
-            {'topics': 50, 'ndcg_cut_10': 0.6095, 'judged_cut_10': 0.6320, 'ndcg_cut_20': 0.5693,
-             'judged_cut_20': 0.5990, 'map': 0.2344, 'recall_1000': 0.5280, 'judged_cut_1000': 0.2257},
+            {'topics': 50, 'ndcg_cut_10': 0.6121, 'judged_cut_10': 0.6340, 'ndcg_cut_20': 0.5705,
+             'judged_cut_20': 0.6000, 'map': 0.2345, 'recall_1000': 0.5279, 'judged_cut_1000': 0.2255},
     }
     evaluate_runs(round4_cumulative_qrels, cumulative_runs, expected=expected_metrics, check_md5=check_md5_flag)
 
@@ -271,7 +271,7 @@ def main():
              'judged_cut_20': 0.9600, 'map': 0.2718, 'recall_1000': 0.4550, 'judged_cut_1000': 0.3845},
         'expanded.anserini.covid-r5.abstract.qdel.bm25.txt':
             {'topics': 50, 'ndcg_cut_10': 0.6939, 'judged_cut_10': 0.9920, 'ndcg_cut_20': 0.6524,
-             'judged_cut_20': 0.9610, 'map': 0.2752, 'recall_1000': 0.4595, 'judged_cut_1000': 0.3825},
+             'judged_cut_20': 0.9610, 'map': 0.2752, 'recall_1000': 0.4596, 'judged_cut_1000': 0.3825},
         'expanded.anserini.covid-r5.full-text.qq.bm25.txt':
             {'topics': 50, 'ndcg_cut_10': 0.6300, 'judged_cut_10': 0.9680, 'ndcg_cut_20': 0.5843,
              'judged_cut_20': 0.9260, 'map': 0.2475, 'recall_1000': 0.4201, 'judged_cut_1000': 0.3921},
@@ -291,8 +291,8 @@ def main():
             {'topics': 50, 'ndcg_cut_10': 0.7131, 'judged_cut_10': 1.0000, 'ndcg_cut_20': 0.6755,
              'judged_cut_20': 0.9910, 'map': 0.3036, 'recall_1000': 0.5166, 'judged_cut_1000': 0.4518},
         'expanded.anserini.covid-r5.abstract.qdel.bm25+rm3Rf.txt':
-            {'topics': 50, 'ndcg_cut_10': 0.8160, 'judged_cut_10': 1.0000, 'ndcg_cut_20': 0.7787,
-             'judged_cut_20': 0.9960, 'map': 0.3421, 'recall_1000': 0.5249, 'judged_cut_1000': 0.4107},
+            {'topics': 50, 'ndcg_cut_10': 0.8175, 'judged_cut_10': 1.0000, 'ndcg_cut_20': 0.7778,
+             'judged_cut_20': 0.9950, 'map': 0.3421, 'recall_1000': 0.5250, 'judged_cut_1000': 0.4106},
     }
     evaluate_runs(complete_qrels, cumulative_runs, expected=expected_metrics, check_md5=check_md5_flag)
 
@@ -310,8 +310,8 @@ def main():
             {'topics': 50, 'ndcg_cut_10': 0.5825, 'judged_cut_10': 0.9680, 'ndcg_cut_20': 0.5436,
              'judged_cut_20': 0.8700, 'map': 0.2319, 'recall_1000': 0.5861, 'judged_cut_1000': 0.2138},
         'expanded.anserini.final-r5.rf.txt':
-            {'topics': 50, 'ndcg_cut_10': 0.6628, 'judged_cut_10': 0.9460, 'ndcg_cut_20': 0.6040,
-             'judged_cut_20': 0.8370, 'map': 0.2410, 'recall_1000': 0.6039, 'judged_cut_1000': 0.1995},
+            {'topics': 50, 'ndcg_cut_10': 0.6620, 'judged_cut_10': 0.9460, 'ndcg_cut_20': 0.6053,
+             'judged_cut_20': 0.8380, 'map': 0.2409, 'recall_1000': 0.6034, 'judged_cut_1000': 0.1993},
         'expanded.anserini.final-r5.rf.post-processed.txt':
             {'topics': 50, 'ndcg_cut_10': 0.6757, 'judged_cut_10': 0.9620, 'ndcg_cut_20': 0.6124,
              'judged_cut_20': 0.8470, 'map': 0.2433, 'recall_1000': 0.6039, 'judged_cut_1000': 0.1998},