From 444eacc20e18edec472ad1a673b90f57dc60266d Mon Sep 17 00:00:00 2001 From: Jimmy Lin Date: Tue, 19 Sep 2023 22:11:18 -0400 Subject: [PATCH] Fix TREC-COVID regressions (take 2) (#2201) --- .../generate_round4_doc2query_baselines.py | 10 +++++----- .../generate_round5_doc2query_baselines.py | 16 ++++++++-------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/main/python/trec-covid/generate_round4_doc2query_baselines.py b/src/main/python/trec-covid/generate_round4_doc2query_baselines.py index f80347d7e9..ad3ec5308c 100644 --- a/src/main/python/trec-covid/generate_round4_doc2query_baselines.py +++ b/src/main/python/trec-covid/generate_round4_doc2query_baselines.py @@ -251,21 +251,21 @@ def main(): {'topics': 45, 'ndcg_cut_10': 0.6618, 'judged_cut_10': 0.8622, 'ndcg_cut_20': 0.6331, 'judged_cut_20': 0.8444, 'map': 0.2974, 'recall_1000': 0.5847, 'judged_cut_1000': 0.3344}, 'expanded.anserini.covid-r4.abstract.qdel.bm25+rm3Rf.txt': - {'topics': 45, 'ndcg_cut_10': 0.7447, 'judged_cut_10': 0.8933, 'ndcg_cut_20': 0.7067, - 'judged_cut_20': 0.8589, 'map': 0.3182, 'recall_1000': 0.5812, 'judged_cut_1000': 0.2904}, + {'topics': 45, 'ndcg_cut_10': 0.7436, 'judged_cut_10': 0.8911, 'ndcg_cut_20': 0.7067, + 'judged_cut_20': 0.8600, 'map': 0.3182, 'recall_1000': 0.5811, 'judged_cut_1000': 0.2904}, } evaluate_runs(round4_cumulative_qrels, cumulative_runs, expected=expected_metrics, check_md5=check_md5_flag) expected_metrics = { 'expanded.anserini.final-r4.fusion1.txt': {'topics': 45, 'ndcg_cut_10': 0.5395, 'judged_cut_10': 0.7222, 'ndcg_cut_20': 0.5115, - 'judged_cut_20': 0.6944, 'map': 0.2498, 'recall_1000': 0.6717, 'judged_cut_1000': 0.1424}, + 'judged_cut_20': 0.6944, 'map': 0.2497, 'recall_1000': 0.6717, 'judged_cut_1000': 0.1424}, 'expanded.anserini.final-r4.fusion2.txt': {'topics': 45, 'ndcg_cut_10': 0.5630, 'judged_cut_10': 0.7444, 'ndcg_cut_20': 0.5175, 'judged_cut_20': 0.6911, 'map': 0.2550, 'recall_1000': 0.6800, 'judged_cut_1000': 0.1434}, 'expanded.anserini.final-r4.rf.txt': - {'topics': 45, 'ndcg_cut_10': 0.6062, 'judged_cut_10': 0.7378, 'ndcg_cut_20': 0.5606, - 'judged_cut_20': 0.6833, 'map': 0.2658, 'recall_1000': 0.6759, 'judged_cut_1000': 0.1284}, + {'topics': 45, 'ndcg_cut_10': 0.6086, 'judged_cut_10': 0.7400, 'ndcg_cut_20': 0.5619, + 'judged_cut_20': 0.6844, 'map': 0.2659, 'recall_1000': 0.6765, 'judged_cut_1000': 0.1284}, } evaluate_runs(round4_qrels, final_runs, expected=expected_metrics, check_md5=check_md5_flag) diff --git a/src/main/python/trec-covid/generate_round5_doc2query_baselines.py b/src/main/python/trec-covid/generate_round5_doc2query_baselines.py index de88990932..57b9e8d59f 100644 --- a/src/main/python/trec-covid/generate_round5_doc2query_baselines.py +++ b/src/main/python/trec-covid/generate_round5_doc2query_baselines.py @@ -240,7 +240,7 @@ def main(): 'judged_cut_20': 0.5120, 'map': 0.1728, 'recall_1000': 0.4462, 'judged_cut_1000': 0.2059}, 'expanded.anserini.covid-r5.abstract.qdel.bm25.txt': {'topics': 50, 'ndcg_cut_10': 0.4548, 'judged_cut_10': 0.5000, 'ndcg_cut_20': 0.4260, - 'judged_cut_20': 0.4880, 'map': 0.1742, 'recall_1000': 0.4527, 'judged_cut_1000': 0.2051}, + 'judged_cut_20': 0.4880, 'map': 0.1742, 'recall_1000': 0.4528, 'judged_cut_1000': 0.2051}, 'expanded.anserini.covid-r5.full-text.qq.bm25.txt': {'topics': 50, 'ndcg_cut_10': 0.4450, 'judged_cut_10': 0.6020, 'ndcg_cut_20': 0.4208, 'judged_cut_20': 0.5820, 'map': 0.1801, 'recall_1000': 0.4473, 'judged_cut_1000': 0.2393}, @@ -260,8 +260,8 @@ def main(): {'topics': 50, 'ndcg_cut_10': 0.4846, 'judged_cut_10': 0.5740, 'ndcg_cut_20': 0.4565, 'judged_cut_20': 0.5400, 'map': 0.2045, 'recall_1000': 0.5218, 'judged_cut_1000': 0.2578}, 'expanded.anserini.covid-r5.abstract.qdel.bm25+rm3Rf.txt': - {'topics': 50, 'ndcg_cut_10': 0.6095, 'judged_cut_10': 0.6320, 'ndcg_cut_20': 0.5693, - 'judged_cut_20': 0.5990, 'map': 0.2344, 'recall_1000': 0.5280, 'judged_cut_1000': 0.2257}, + {'topics': 50, 'ndcg_cut_10': 0.6121, 'judged_cut_10': 0.6340, 'ndcg_cut_20': 0.5705, + 'judged_cut_20': 0.6000, 'map': 0.2345, 'recall_1000': 0.5279, 'judged_cut_1000': 0.2255}, } evaluate_runs(round4_cumulative_qrels, cumulative_runs, expected=expected_metrics, check_md5=check_md5_flag) @@ -271,7 +271,7 @@ def main(): 'judged_cut_20': 0.9600, 'map': 0.2718, 'recall_1000': 0.4550, 'judged_cut_1000': 0.3845}, 'expanded.anserini.covid-r5.abstract.qdel.bm25.txt': {'topics': 50, 'ndcg_cut_10': 0.6939, 'judged_cut_10': 0.9920, 'ndcg_cut_20': 0.6524, - 'judged_cut_20': 0.9610, 'map': 0.2752, 'recall_1000': 0.4595, 'judged_cut_1000': 0.3825}, + 'judged_cut_20': 0.9610, 'map': 0.2752, 'recall_1000': 0.4596, 'judged_cut_1000': 0.3825}, 'expanded.anserini.covid-r5.full-text.qq.bm25.txt': {'topics': 50, 'ndcg_cut_10': 0.6300, 'judged_cut_10': 0.9680, 'ndcg_cut_20': 0.5843, 'judged_cut_20': 0.9260, 'map': 0.2475, 'recall_1000': 0.4201, 'judged_cut_1000': 0.3921}, @@ -291,8 +291,8 @@ def main(): {'topics': 50, 'ndcg_cut_10': 0.7131, 'judged_cut_10': 1.0000, 'ndcg_cut_20': 0.6755, 'judged_cut_20': 0.9910, 'map': 0.3036, 'recall_1000': 0.5166, 'judged_cut_1000': 0.4518}, 'expanded.anserini.covid-r5.abstract.qdel.bm25+rm3Rf.txt': - {'topics': 50, 'ndcg_cut_10': 0.8160, 'judged_cut_10': 1.0000, 'ndcg_cut_20': 0.7787, - 'judged_cut_20': 0.9960, 'map': 0.3421, 'recall_1000': 0.5249, 'judged_cut_1000': 0.4107}, + {'topics': 50, 'ndcg_cut_10': 0.8175, 'judged_cut_10': 1.0000, 'ndcg_cut_20': 0.7778, + 'judged_cut_20': 0.9950, 'map': 0.3421, 'recall_1000': 0.5250, 'judged_cut_1000': 0.4106}, } evaluate_runs(complete_qrels, cumulative_runs, expected=expected_metrics, check_md5=check_md5_flag) @@ -310,8 +310,8 @@ def main(): {'topics': 50, 'ndcg_cut_10': 0.5825, 'judged_cut_10': 0.9680, 'ndcg_cut_20': 0.5436, 'judged_cut_20': 0.8700, 'map': 0.2319, 'recall_1000': 0.5861, 'judged_cut_1000': 0.2138}, 'expanded.anserini.final-r5.rf.txt': - {'topics': 50, 'ndcg_cut_10': 0.6628, 'judged_cut_10': 0.9460, 'ndcg_cut_20': 0.6040, - 'judged_cut_20': 0.8370, 'map': 0.2410, 'recall_1000': 0.6039, 'judged_cut_1000': 0.1995}, + {'topics': 50, 'ndcg_cut_10': 0.6620, 'judged_cut_10': 0.9460, 'ndcg_cut_20': 0.6053, + 'judged_cut_20': 0.8380, 'map': 0.2409, 'recall_1000': 0.6034, 'judged_cut_1000': 0.1993}, 'expanded.anserini.final-r5.rf.post-processed.txt': {'topics': 50, 'ndcg_cut_10': 0.6757, 'judged_cut_10': 0.9620, 'ndcg_cut_20': 0.6124, 'judged_cut_20': 0.8470, 'map': 0.2433, 'recall_1000': 0.6039, 'judged_cut_1000': 0.1998},