From 88935fc9431dbb81d55883547c185c4d1f44bf36 Mon Sep 17 00:00:00 2001 From: Jimmy Lin Date: Mon, 18 Sep 2023 19:45:20 -0400 Subject: [PATCH] Fix TREC-COVID regressions (#2196) --- .../python/trec-covid/covid_baseline_tools.py | 14 ++-- .../python/trec-covid/generate_query_udel.py | 4 +- .../trec-covid/generate_round1_baselines.py | 14 ++-- .../trec-covid/generate_round2_baselines.py | 22 +++--- .../trec-covid/generate_round3_baselines.py | 74 +++++++++---------- .../trec-covid/generate_round4_baselines.py | 68 ++++++++--------- .../generate_round4_doc2query_baselines.py | 12 +-- .../trec-covid/generate_round5_baselines.py | 68 ++++++++--------- .../generate_round5_doc2query_baselines.py | 12 +-- src/main/python/trec-covid/index_cord19.py | 6 +- 10 files changed, 147 insertions(+), 147 deletions(-) diff --git a/src/main/python/trec-covid/covid_baseline_tools.py b/src/main/python/trec-covid/covid_baseline_tools.py index aa18f3c08f..013044b122 100644 --- a/src/main/python/trec-covid/covid_baseline_tools.py +++ b/src/main/python/trec-covid/covid_baseline_tools.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # - +import math import os import re import subprocess @@ -22,11 +22,11 @@ def perform_runs(round_number, indexes): - base_topics = f'src/main/resources/topics-and-qrels/topics.covid-round{round_number}.xml' - udel_topics = f'src/main/resources/topics-and-qrels/topics.covid-round{round_number}-udel.xml' + base_topics = f'tools/topics-and-qrels/topics.covid-round{round_number}.xml' + udel_topics = f'tools/topics-and-qrels/topics.covid-round{round_number}-udel.xml' # Use cumulative qrels from previous round for relevance feedback runs - cumulative_qrels = f'src/main/resources/topics-and-qrels/qrels.covid-round{round_number - 1}-cumulative.txt' + cumulative_qrels = f'tools/topics-and-qrels/qrels.covid-round{round_number - 1}-cumulative.txt' print('') print('## Running on abstract index...') @@ -119,7 +119,7 @@ def perform_fusion(round_number, run_checksums, check_md5=True): def prepare_final_submissions(round_number, run_checksums, check_md5=True): # Remove the cumulative qrels from the previous round. - qrels = f'src/main/resources/topics-and-qrels/qrels.covid-round{round_number - 1}-cumulative.txt' + qrels = f'tools/topics-and-qrels/qrels.covid-round{round_number - 1}-cumulative.txt' print('') print('## Preparing final submission files by removing qrels...') @@ -235,8 +235,8 @@ def evaluate_runs(qrels, runs, expected={}, check_md5=True): for key in ['topics', 'ndcg_cut_10', 'judged_cut_10', 'ndcg_cut_20', 'judged_cut_20', 'map', 'recall_1000', 'judged_cut_1000']: if key in expected[run]: - assert metrics[key] == expected[run][key],\ - f'\'{key}\' doesn\'t match, expected {expected[run][key]} got {metrics[key]}!' + assert math.isclose(metrics[key], expected[run][key], rel_tol=1e-4), \ + f'\'{key}\' doesn\'t match, expected {expected[run][key]:.4f} got {metrics[key]:.4f}!' if check_md5: assert metrics['md5'] == runs[run], f'Error in producing {run}!' diff --git a/src/main/python/trec-covid/generate_query_udel.py b/src/main/python/trec-covid/generate_query_udel.py index 4aa89c50d7..1120f34844 100755 --- a/src/main/python/trec-covid/generate_query_udel.py +++ b/src/main/python/trec-covid/generate_query_udel.py @@ -95,14 +95,14 @@ original_query_file_name = f'topics.covid-round{args.round}.xml' original_query_file = os.path.join( args.anserini_root, - 'src/main/resources/topics-and-qrels', + 'tools/topics-and-qrels', original_query_file_name ) output_query_file_name = f'topics.covid-round{args.round}-udel.xml' output_query_file = os.path.join( args.anserini_root, - 'src/main/resources/topics-and-qrels', + 'tools/topics-and-qrels', output_query_file_name ) diff --git a/src/main/python/trec-covid/generate_round1_baselines.py b/src/main/python/trec-covid/generate_round1_baselines.py index a916cb6667..cfe89bedf9 100644 --- a/src/main/python/trec-covid/generate_round1_baselines.py +++ b/src/main/python/trec-covid/generate_round1_baselines.py @@ -54,8 +54,8 @@ def perform_runs(): - base_topics = f'src/main/resources/topics-and-qrels/topics.covid-round1.xml' - udel_topics = f'src/main/resources/topics-and-qrels/topics.covid-round1-udel.xml' + base_topics = f'tools/topics-and-qrels/topics.covid-round1.xml' + udel_topics = f'tools/topics-and-qrels/topics.covid-round1-udel.xml' print('') print('## Running on abstract index...') @@ -178,7 +178,7 @@ def main(): if not (os.path.isdir(indexes[0]) and os.path.isdir(indexes[1]) and os.path.isdir(indexes[2])): print('Required indexes do not exist. Please download first.') - round1_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round1.txt' + round1_qrels = 'tools/topics-and-qrels/qrels.covid-round1.txt' # Note that this script was written after this issue was noted: https://github.com/castorini/anserini/issues/1669 # Thus, no point in checking MD5. @@ -189,7 +189,7 @@ def main(): expected_metrics = { 'anserini.covid-r1.abstract.query.bm25.txt': - {'ndcg_cut_10': 0.4100, 'judged_cut_10': 0.8267, 'recall_1000': 0.5279}, + {'ndcg_cut_10': 0.4100, 'judged_cut_10': 0.8267, 'recall_1000': 0.5285}, 'anserini.covid-r1.abstract.question.bm25.txt': {'ndcg_cut_10': 0.5179, 'judged_cut_10': 0.9833, 'recall_1000': 0.6313}, 'anserini.covid-r1.abstract.query+question.bm25.txt': @@ -213,9 +213,9 @@ def main(): 'anserini.covid-r1.full-text.query-covid19.bm25.txt': {'ndcg_cut_10': 0.2434, 'judged_cut_10': 0.5233, 'recall_1000': 0.5692}, 'anserini.covid-r1.paragraph.query.bm25.txt': - {'ndcg_cut_10': 0.4302, 'judged_cut_10': 0.8400, 'recall_1000': 0.4327}, + {'ndcg_cut_10': 0.4303, 'judged_cut_10': 0.8400, 'recall_1000': 0.4324}, 'anserini.covid-r1.paragraph.question.bm25.txt': - {'ndcg_cut_10': 0.4410, 'judged_cut_10': 0.9167, 'recall_1000': 0.5111}, + {'ndcg_cut_10': 0.4410, 'judged_cut_10': 0.9167, 'recall_1000': 0.5108}, 'anserini.covid-r1.paragraph.query+question.bm25.txt': {'ndcg_cut_10': 0.5450, 'judged_cut_10': 0.9733, 'recall_1000': 0.5743}, 'anserini.covid-r1.paragraph.query+question+narrative.bm25.txt': @@ -223,7 +223,7 @@ def main(): 'anserini.covid-r1.paragraph.query-udel.bm25.txt': {'ndcg_cut_10': 0.5544, 'judged_cut_10': 0.9200, 'recall_1000': 0.5640}, 'anserini.covid-r1.paragraph.query-covid19.bm25.txt': - {'ndcg_cut_10': 0.3180, 'judged_cut_10': 0.5333, 'recall_1000': 0.3552}, + {'ndcg_cut_10': 0.3180, 'judged_cut_10': 0.5300, 'recall_1000': 0.3552}, 'anserini.covid-r1.fusion1.txt': {'ndcg_cut_10': 0.5716, 'judged_cut_10': 0.9867, 'recall_1000': 0.8122}, 'anserini.covid-r1.fusion2.txt': diff --git a/src/main/python/trec-covid/generate_round2_baselines.py b/src/main/python/trec-covid/generate_round2_baselines.py index 15bdefd667..4321525a5a 100644 --- a/src/main/python/trec-covid/generate_round2_baselines.py +++ b/src/main/python/trec-covid/generate_round2_baselines.py @@ -50,8 +50,8 @@ def perform_runs(): - base_topics = f'src/main/resources/topics-and-qrels/topics.covid-round2.xml' - udel_topics = f'src/main/resources/topics-and-qrels/topics.covid-round2-udel.xml' + base_topics = f'tools/topics-and-qrels/topics.covid-round2.xml' + udel_topics = f'tools/topics-and-qrels/topics.covid-round2-udel.xml' print('') print('## Running on abstract index...') @@ -135,7 +135,7 @@ def perform_fusion(run_checksums, check_md5=True): def prepare_final_submissions(run_checksums, check_md5=True): # Remove the cumulative qrels from the previous round. - qrels = f'src/main/resources/topics-and-qrels/qrels.covid-round1.txt' + qrels = f'tools/topics-and-qrels/qrels.covid-round1.txt' print('') print('## Preparing final submission files by removing qrels...') @@ -170,8 +170,8 @@ def main(): if not (os.path.isdir(indexes[0]) and os.path.isdir(indexes[1]) and os.path.isdir(indexes[2])): print('Required indexes do not exist. Please download first.') - round1_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round1.txt' - round2_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round2.txt' + round1_qrels = 'tools/topics-and-qrels/qrels.covid-round1.txt' + round2_qrels = 'tools/topics-and-qrels/qrels.covid-round2.txt' # Note that this script was written after this issue was noted: https://github.com/castorini/anserini/issues/1669 # Thus, no point in checking MD5. @@ -183,11 +183,11 @@ def main(): expected_metrics = { 'anserini.covid-r2.abstract.qq.bm25.txt': - {'topics': 35, 'ndcg_cut_10': 0.3522, 'judged_cut_10': 0.5371, 'ndcg_cut_20': 0.3171, + {'topics': 35, 'ndcg_cut_10': 0.3521, 'judged_cut_10': 0.5371, 'ndcg_cut_20': 0.3170, 'judged_cut_20': 0.5100, 'map': 0.1752, 'recall_1000': 0.6601, 'judged_cut_1000': 0.1013}, 'anserini.covid-r2.abstract.qdel.bm25.txt': {'topics': 35, 'ndcg_cut_10': 0.3781, 'judged_cut_10': 0.5371, 'ndcg_cut_20': 0.3462, - 'judged_cut_20': 0.4829, 'map': 0.1804, 'recall_1000': 0.6485, 'judged_cut_1000': 0.0958}, + 'judged_cut_20': 0.4829, 'map': 0.1803, 'recall_1000': 0.6485, 'judged_cut_1000': 0.0958}, 'anserini.covid-r2.full-text.qq.bm25.txt': {'topics': 35, 'ndcg_cut_10': 0.2070, 'judged_cut_10': 0.4286, 'ndcg_cut_20': 0.1931, 'judged_cut_20': 0.3929, 'map': 0.1159, 'recall_1000': 0.5953, 'judged_cut_1000': 0.0995}, @@ -195,11 +195,11 @@ def main(): {'topics': 35, 'ndcg_cut_10': 0.3123, 'judged_cut_10': 0.4229, 'ndcg_cut_20': 0.2738, 'judged_cut_20': 0.3929, 'map': 0.1473, 'recall_1000': 0.6517, 'judged_cut_1000': 0.1022}, 'anserini.covid-r2.paragraph.qq.bm25.txt': - {'topics': 35, 'ndcg_cut_10': 0.2772, 'judged_cut_10': 0.4400, 'ndcg_cut_20': 0.2579, + {'topics': 35, 'ndcg_cut_10': 0.2770, 'judged_cut_10': 0.4400, 'ndcg_cut_20': 0.2578, 'judged_cut_20': 0.4529, 'map': 0.1607, 'recall_1000': 0.7248, 'judged_cut_1000': 0.1220}, 'anserini.covid-r2.paragraph.qdel.bm25.txt': - {'topics': 35, 'ndcg_cut_10': 0.3353, 'judged_cut_10': 0.4343, 'ndcg_cut_20': 0.2956, - 'judged_cut_20': 0.4329, 'map': 0.1772, 'recall_1000': 0.7196, 'judged_cut_1000': 0.1136}, + {'topics': 35, 'ndcg_cut_10': 0.3350, 'judged_cut_10': 0.4343, 'ndcg_cut_20': 0.2954, + 'judged_cut_20': 0.4329, 'map': 0.1772, 'recall_1000': 0.7196, 'judged_cut_1000': 0.1137}, 'anserini.covid-r2.fusion1.txt': {'topics': 35, 'ndcg_cut_10': 0.3297, 'judged_cut_10': 0.4657, 'ndcg_cut_20': 0.3060, 'judged_cut_20': 0.4643, 'map': 0.1914, 'recall_1000': 0.7561, 'judged_cut_1000': 0.1304}, @@ -216,7 +216,7 @@ def main(): {'topics': 35, 'ndcg_cut_10': 0.4827, 'judged_cut_10': 0.9543, 'ndcg_cut_20': 0.4512, 'judged_cut_20': 0.8614, 'map': 0.2431, 'recall_1000': 0.6475, 'judged_cut_1000': 0.1463}, 'anserini.final-r2.fusion2.txt': - {'topics': 35, 'ndcg_cut_10': 0.5553, 'judged_cut_10': 0.9743, 'ndcg_cut_20': 0.5058, + {'topics': 35, 'ndcg_cut_10': 0.5553, 'judged_cut_10': 0.9714, 'ndcg_cut_20': 0.5058, 'judged_cut_20': 0.8957, 'map': 0.2739, 'recall_1000': 0.6832, 'judged_cut_1000': 0.1528}, } evaluate_runs(round2_qrels, final_runs, expected=expected_metrics, check_md5=check_md5_flag) diff --git a/src/main/python/trec-covid/generate_round3_baselines.py b/src/main/python/trec-covid/generate_round3_baselines.py index 40032370ca..9310ec3bed 100644 --- a/src/main/python/trec-covid/generate_round3_baselines.py +++ b/src/main/python/trec-covid/generate_round3_baselines.py @@ -89,13 +89,13 @@ def main(): if not (os.path.isdir(indexes[0]) and os.path.isdir(indexes[1]) and os.path.isdir(indexes[2])): print('Required indexes do not exist. Please download first.') - os.system('cat src/main/resources/topics-and-qrels/qrels.covid-round1.txt ' + - 'src/main/resources/topics-and-qrels/qrels.covid-round2.txt ' + - '> src/main/resources/topics-and-qrels/qrels.covid-round2-cumulative.txt') + os.system('cat tools/topics-and-qrels/qrels.covid-round1.txt ' + + 'tools/topics-and-qrels/qrels.covid-round2.txt ' + + '> tools/topics-and-qrels/qrels.covid-round2-cumulative.txt') - round2_cumulative_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round2-cumulative.txt' - round3_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round3.txt' - round3_cumulative_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round3-cumulative.txt' + round2_cumulative_qrels = 'tools/topics-and-qrels/qrels.covid-round2-cumulative.txt' + round3_qrels = 'tools/topics-and-qrels/qrels.covid-round3.txt' + round3_cumulative_qrels = 'tools/topics-and-qrels/qrels.covid-round3-cumulative.txt' # MD5 checksums don't match anymore, see https://github.com/castorini/anserini/issues/1669 check_md5_flag = False @@ -107,11 +107,11 @@ def main(): expected_metrics = { 'anserini.covid-r3.abstract.qq.bm25.txt': - {'topics': 40, 'ndcg_cut_10': 0.2117, 'judged_cut_10': 0.3300, 'ndcg_cut_20': 0.2043, - 'judged_cut_20': 0.3150, 'map': 0.0951, 'recall_1000': 0.4398, 'judged_cut_1000': 0.1275}, + {'topics': 40, 'ndcg_cut_10': 0.2136, 'judged_cut_10': 0.3275, 'ndcg_cut_20': 0.2040, + 'judged_cut_20': 0.3138, 'map': 0.0950, 'recall_1000': 0.4398, 'judged_cut_1000': 0.1275}, 'anserini.covid-r3.abstract.qdel.bm25.txt': - {'topics': 40, 'ndcg_cut_10': 0.2466, 'judged_cut_10': 0.3375, 'ndcg_cut_20': 0.2253, - 'judged_cut_20': 0.3175, 'map': 0.1023, 'recall_1000': 0.4537, 'judged_cut_1000': 0.1248}, + {'topics': 40, 'ndcg_cut_10': 0.2478, 'judged_cut_10': 0.3325, 'ndcg_cut_20': 0.2283, + 'judged_cut_20': 0.3175, 'map': 0.1025, 'recall_1000': 0.4537, 'judged_cut_1000': 0.1249}, 'anserini.covid-r3.full-text.qq.bm25.txt': {'topics': 40, 'ndcg_cut_10': 0.2337, 'judged_cut_10': 0.4650, 'ndcg_cut_20': 0.2259, 'judged_cut_20': 0.4425, 'map': 0.1099, 'recall_1000': 0.4817, 'judged_cut_1000': 0.1490}, @@ -119,30 +119,30 @@ def main(): {'topics': 40, 'ndcg_cut_10': 0.3430, 'judged_cut_10': 0.5025, 'ndcg_cut_20': 0.3077, 'judged_cut_20': 0.4888, 'map': 0.1426, 'recall_1000': 0.5267, 'judged_cut_1000': 0.1575}, 'anserini.covid-r3.paragraph.qq.bm25.txt': - {'topics': 40, 'ndcg_cut_10': 0.2848, 'judged_cut_10': 0.5175, 'ndcg_cut_20': 0.2734, - 'judged_cut_20': 0.4938, 'map': 0.1390, 'recall_1000': 0.5527, 'judged_cut_1000': 0.1727}, + {'topics': 40, 'ndcg_cut_10': 0.2843, 'judged_cut_10': 0.5175, 'ndcg_cut_20': 0.2731, + 'judged_cut_20': 0.4938, 'map': 0.1389, 'recall_1000': 0.5527, 'judged_cut_1000': 0.1727}, 'anserini.covid-r3.paragraph.qdel.bm25.txt': - {'topics': 40, 'ndcg_cut_10': 0.3604, 'judged_cut_10': 0.5050, 'ndcg_cut_20': 0.3213, - 'judged_cut_20': 0.4875, 'map': 0.1520, 'recall_1000': 0.5676, 'judged_cut_1000': 0.1672}, + {'topics': 40, 'ndcg_cut_10': 0.3598, 'judged_cut_10': 0.5050, 'ndcg_cut_20': 0.3209, + 'judged_cut_20': 0.4875, 'map': 0.1519, 'recall_1000': 0.5676, 'judged_cut_1000': 0.1672}, 'anserini.covid-r3.fusion1.txt': - {'topics': 40, 'ndcg_cut_10': 0.3093, 'judged_cut_10': 0.4975, 'ndcg_cut_20': 0.2933, - 'judged_cut_20': 0.5025, 'map': 0.1400, 'recall_1000': 0.5566, 'judged_cut_1000': 0.1750}, + {'topics': 40, 'ndcg_cut_10': 0.3065, 'judged_cut_10': 0.4950, 'ndcg_cut_20': 0.2923, + 'judged_cut_20': 0.5012, 'map': 0.1399, 'recall_1000': 0.5566, 'judged_cut_1000': 0.1750}, 'anserini.covid-r3.fusion2.txt': - {'topics': 40, 'ndcg_cut_10': 0.3568, 'judged_cut_10': 0.5250, 'ndcg_cut_20': 0.3273, - 'judged_cut_20': 0.4925, 'map': 0.1564, 'recall_1000': 0.5769, 'judged_cut_1000': 0.1715}, + {'topics': 40, 'ndcg_cut_10': 0.3559, 'judged_cut_10': 0.5250, 'ndcg_cut_20': 0.3263, + 'judged_cut_20': 0.4925, 'map': 0.1563, 'recall_1000': 0.5769, 'judged_cut_1000': 0.1715}, 'anserini.covid-r3.abstract.qdel.bm25+rm3Rf.txt': - {'topics': 40, 'ndcg_cut_10': 0.3633, 'judged_cut_10': 0.3800, 'ndcg_cut_20': 0.3175, - 'judged_cut_20': 0.3600, 'map': 0.1526, 'recall_1000': 0.5722, 'judged_cut_1000': 0.1398}, + {'topics': 40, 'ndcg_cut_10': 0.3616, 'judged_cut_10': 0.3775, 'ndcg_cut_20': 0.3174, + 'judged_cut_20': 0.3575, 'map': 0.1525, 'recall_1000': 0.5715, 'judged_cut_1000': 0.1398}, } evaluate_runs(round2_cumulative_qrels, cumulative_runs, expected=expected_metrics, check_md5=check_md5_flag) expected_metrics = { 'anserini.covid-r3.abstract.qq.bm25.txt': - {'topics': 40, 'ndcg_cut_10': 0.5780, 'judged_cut_10': 0.8875, 'ndcg_cut_20': 0.5359, - 'judged_cut_20': 0.8325, 'map': 0.2348, 'recall_1000': 0.5040, 'judged_cut_1000': 0.2351}, + {'topics': 40, 'ndcg_cut_10': 0.5804, 'judged_cut_10': 0.8875, 'ndcg_cut_20': 0.5377, + 'judged_cut_20': 0.8325, 'map': 0.2347, 'recall_1000': 0.5039, 'judged_cut_1000': 0.2351}, 'anserini.covid-r3.abstract.qdel.bm25.txt': - {'topics': 40, 'ndcg_cut_10': 0.6289, 'judged_cut_10': 0.9300, 'ndcg_cut_20': 0.5971, - 'judged_cut_20': 0.8925, 'map': 0.2525, 'recall_1000': 0.5215, 'judged_cut_1000': 0.2370}, + {'topics': 40, 'ndcg_cut_10': 0.6295, 'judged_cut_10': 0.9275, 'ndcg_cut_20': 0.5989, + 'judged_cut_20': 0.8912, 'map': 0.2524, 'recall_1000': 0.5214, 'judged_cut_1000': 0.2370}, 'anserini.covid-r3.full-text.qq.bm25.txt': {'topics': 40, 'ndcg_cut_10': 0.3977, 'judged_cut_10': 0.7500, 'ndcg_cut_20': 0.3681, 'judged_cut_20': 0.7213, 'map': 0.1646, 'recall_1000': 0.4708, 'judged_cut_1000': 0.2471}, @@ -150,39 +150,39 @@ def main(): {'topics': 40, 'ndcg_cut_10': 0.5790, 'judged_cut_10': 0.9050, 'ndcg_cut_20': 0.5234, 'judged_cut_20': 0.8525, 'map': 0.2236, 'recall_1000': 0.5313, 'judged_cut_1000': 0.2693}, 'anserini.covid-r3.paragraph.qq.bm25.txt': - {'topics': 40, 'ndcg_cut_10': 0.5396, 'judged_cut_10': 0.9425, 'ndcg_cut_20': 0.5079, + {'topics': 40, 'ndcg_cut_10': 0.5397, 'judged_cut_10': 0.9425, 'ndcg_cut_20': 0.5079, 'judged_cut_20': 0.9050, 'map': 0.2498, 'recall_1000': 0.5766, 'judged_cut_1000': 0.2978}, 'anserini.covid-r3.paragraph.qdel.bm25.txt': - {'topics': 40, 'ndcg_cut_10': 0.6327, 'judged_cut_10': 0.9600, 'ndcg_cut_20': 0.5793, + {'topics': 40, 'ndcg_cut_10': 0.6328, 'judged_cut_10': 0.9600, 'ndcg_cut_20': 0.5793, 'judged_cut_20': 0.9162, 'map': 0.2753, 'recall_1000': 0.5923, 'judged_cut_1000': 0.2956}, 'anserini.covid-r3.fusion1.txt': - {'topics': 40, 'ndcg_cut_10': 0.5924, 'judged_cut_10': 0.9625, 'ndcg_cut_20': 0.5563, + {'topics': 40, 'ndcg_cut_10': 0.5917, 'judged_cut_10': 0.9625, 'ndcg_cut_20': 0.5559, 'judged_cut_20': 0.9362, 'map': 0.2700, 'recall_1000': 0.5956, 'judged_cut_1000': 0.3045}, 'anserini.covid-r3.fusion2.txt': - {'topics': 40, 'ndcg_cut_10': 0.6515, 'judged_cut_10': 0.9875, 'ndcg_cut_20': 0.6200, - 'judged_cut_20': 0.9675, 'map': 0.3027, 'recall_1000': 0.6194, 'judged_cut_1000': 0.3076}, + {'topics': 40, 'ndcg_cut_10': 0.6508, 'judged_cut_10': 0.9875, 'ndcg_cut_20': 0.6184, + 'judged_cut_20': 0.9663, 'map': 0.3026, 'recall_1000': 0.6194, 'judged_cut_1000': 0.3076}, 'anserini.covid-r3.abstract.qdel.bm25+rm3Rf.txt': - {'topics': 40, 'ndcg_cut_10': 0.7459, 'judged_cut_10': 0.9875, 'ndcg_cut_20': 0.7023, - 'judged_cut_20': 0.9637, 'map': 0.3190, 'recall_1000': 0.6125, 'judged_cut_1000': 0.2600}, + {'topics': 40, 'ndcg_cut_10': 0.7411, 'judged_cut_10': 0.9850, 'ndcg_cut_20': 0.7007, + 'judged_cut_20': 0.9637, 'map': 0.3186, 'recall_1000': 0.6113, 'judged_cut_1000': 0.2598}, } evaluate_runs(round3_cumulative_qrels, cumulative_runs, expected=expected_metrics, check_md5=check_md5_flag) expected_metrics = { 'anserini.final-r3.fusion1.txt': - {'topics': 40, 'ndcg_cut_10': 0.5339, 'judged_cut_10': 0.8400, 'ndcg_cut_20': 0.4875, - 'judged_cut_20': 0.7637, 'map': 0.2283, 'recall_1000': 0.6160, 'judged_cut_1000': 0.1370}, + {'topics': 40, 'ndcg_cut_10': 0.5337, 'judged_cut_10': 0.8400, 'ndcg_cut_20': 0.4874, + 'judged_cut_20': 0.7637, 'map': 0.2284, 'recall_1000': 0.6158, 'judged_cut_1000': 0.1370}, 'anserini.final-r3.fusion1.post-processed.txt': {'topics': 40, 'ndcg_cut_10': 0.5359, 'judged_cut_10': 0.8475, 'ndcg_cut_20': 0.4902, 'judged_cut_20': 0.7675, 'map': 0.2293, 'recall_1000': 0.6160, 'judged_cut_1000': 0.1373}, 'anserini.final-r3.fusion2.txt': - {'topics': 40, 'ndcg_cut_10': 0.6072, 'judged_cut_10': 0.9025, 'ndcg_cut_20': 0.5599, - 'judged_cut_20': 0.8337, 'map': 0.2631, 'recall_1000': 0.6441, 'judged_cut_1000': 0.1431}, + {'topics': 40, 'ndcg_cut_10': 0.6058, 'judged_cut_10': 0.9025, 'ndcg_cut_20': 0.5596, + 'judged_cut_20': 0.8350, 'map': 0.2631, 'recall_1000': 0.6441, 'judged_cut_1000': 0.1431}, 'anserini.final-r3.fusion2.post-processed.txt': {'topics': 40, 'ndcg_cut_10': 0.6100, 'judged_cut_10': 0.9100, 'ndcg_cut_20': 0.5617, 'judged_cut_20': 0.8375, 'map': 0.2641, 'recall_1000': 0.6441, 'judged_cut_1000': 0.1434}, 'anserini.final-r3.rf.txt': - {'topics': 40, 'ndcg_cut_10': 0.6812, 'judged_cut_10': 0.9600, 'ndcg_cut_20': 0.6255, - 'judged_cut_20': 0.8450, 'map': 0.2787, 'recall_1000': 0.6399, 'judged_cut_1000': 0.1246}, + {'topics': 40, 'ndcg_cut_10': 0.6793, 'judged_cut_10': 0.9575, 'ndcg_cut_20': 0.6251, + 'judged_cut_20': 0.8500, 'map': 0.2783, 'recall_1000': 0.6390, 'judged_cut_1000': 0.1247}, 'anserini.final-r3.rf.post-processed.txt': {'topics': 40, 'ndcg_cut_10': 0.6883, 'judged_cut_10': 0.9750, 'ndcg_cut_20': 0.6321, 'judged_cut_20': 0.8538, 'map': 0.2817, 'recall_1000': 0.6399, 'judged_cut_1000': 0.1250}, diff --git a/src/main/python/trec-covid/generate_round4_baselines.py b/src/main/python/trec-covid/generate_round4_baselines.py index 2c7c0562e8..c869d5d9d7 100644 --- a/src/main/python/trec-covid/generate_round4_baselines.py +++ b/src/main/python/trec-covid/generate_round4_baselines.py @@ -89,9 +89,9 @@ def main(): if not (os.path.isdir(indexes[0]) and os.path.isdir(indexes[1]) and os.path.isdir(indexes[2])): print('Required indexes do not exist. Please download first.') - round3_cumulative_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round3-cumulative.txt' - round4_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round4.txt' - round4_cumulative_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round4-cumulative.txt' + round3_cumulative_qrels = 'tools/topics-and-qrels/qrels.covid-round3-cumulative.txt' + round4_qrels = 'tools/topics-and-qrels/qrels.covid-round4.txt' + round4_cumulative_qrels = 'tools/topics-and-qrels/qrels.covid-round4-cumulative.txt' # MD5 checksums don't match anymore, see https://github.com/castorini/anserini/issues/1669 check_md5_flag = False @@ -103,11 +103,11 @@ def main(): expected_metrics = { 'anserini.covid-r4.abstract.qq.bm25.txt': - {'topics': 45, 'ndcg_cut_10': 0.3143, 'judged_cut_10': 0.4467, 'ndcg_cut_20': 0.2993, - 'judged_cut_20': 0.4367, 'map': 0.1296, 'recall_1000': 0.4257, 'judged_cut_1000': 0.1672}, + {'topics': 45, 'ndcg_cut_10': 0.3084, 'judged_cut_10': 0.4444, 'ndcg_cut_20': 0.2964, + 'judged_cut_20': 0.4367, 'map': 0.1292, 'recall_1000': 0.4257, 'judged_cut_1000': 0.1671}, 'anserini.covid-r4.abstract.qdel.bm25.txt': - {'topics': 45, 'ndcg_cut_10': 0.3260, 'judged_cut_10': 0.4378, 'ndcg_cut_20': 0.3188, - 'judged_cut_20': 0.4511, 'map': 0.1383, 'recall_1000': 0.4432, 'judged_cut_1000': 0.1706}, + {'topics': 45, 'ndcg_cut_10': 0.3230, 'judged_cut_10': 0.4378, 'ndcg_cut_20': 0.3159, + 'judged_cut_20': 0.4489, 'map': 0.1381, 'recall_1000': 0.4433, 'judged_cut_1000': 0.1706}, 'anserini.covid-r4.full-text.qq.bm25.txt': {'topics': 45, 'ndcg_cut_10': 0.2108, 'judged_cut_10': 0.4044, 'ndcg_cut_20': 0.2119, 'judged_cut_20': 0.4011, 'map': 0.1007, 'recall_1000': 0.3891, 'judged_cut_1000': 0.1776}, @@ -115,30 +115,30 @@ def main(): {'topics': 45, 'ndcg_cut_10': 0.3499, 'judged_cut_10': 0.5067, 'ndcg_cut_20': 0.3260, 'judged_cut_20': 0.4833, 'map': 0.1399, 'recall_1000': 0.4537, 'judged_cut_1000': 0.1952}, 'anserini.covid-r4.paragraph.qq.bm25.txt': - {'topics': 45, 'ndcg_cut_10': 0.3229, 'judged_cut_10': 0.5267, 'ndcg_cut_20': 0.3072, - 'judged_cut_20': 0.5022, 'map': 0.1481, 'recall_1000': 0.4863, 'judged_cut_1000': 0.2110}, + {'topics': 45, 'ndcg_cut_10': 0.3228, 'judged_cut_10': 0.5267, 'ndcg_cut_20': 0.3072, + 'judged_cut_20': 0.5022, 'map': 0.1480, 'recall_1000': 0.4863, 'judged_cut_1000': 0.2110}, 'anserini.covid-r4.paragraph.qdel.bm25.txt': - {'topics': 45, 'ndcg_cut_10': 0.4016, 'judged_cut_10': 0.5333, 'ndcg_cut_20': 0.3666, - 'judged_cut_20': 0.4978, 'map': 0.1647, 'recall_1000': 0.5050, 'judged_cut_1000': 0.2098}, + {'topics': 45, 'ndcg_cut_10': 0.4011, 'judged_cut_10': 0.5333, 'ndcg_cut_20': 0.3663, + 'judged_cut_20': 0.4978, 'map': 0.1647, 'recall_1000': 0.5050, 'judged_cut_1000': 0.2099}, 'anserini.covid-r4.fusion1.txt': - {'topics': 45, 'ndcg_cut_10': 0.3424, 'judged_cut_10': 0.5289, 'ndcg_cut_20': 0.3345, - 'judged_cut_20': 0.5278, 'map': 0.1495, 'recall_1000': 0.5033, 'judged_cut_1000': 0.2160}, + {'topics': 45, 'ndcg_cut_10': 0.3446, 'judged_cut_10': 0.5289, 'ndcg_cut_20': 0.3358, + 'judged_cut_20': 0.5289, 'map': 0.1496, 'recall_1000': 0.5033, 'judged_cut_1000': 0.2160}, 'anserini.covid-r4.fusion2.txt': - {'topics': 45, 'ndcg_cut_10': 0.4004, 'judged_cut_10': 0.5400, 'ndcg_cut_20': 0.3743, - 'judged_cut_20': 0.5111, 'map': 0.1713, 'recall_1000': 0.5291, 'judged_cut_1000': 0.2186}, + {'topics': 45, 'ndcg_cut_10': 0.3987, 'judged_cut_10': 0.5378, 'ndcg_cut_20': 0.3743, + 'judged_cut_20': 0.5100, 'map': 0.1712, 'recall_1000': 0.5291, 'judged_cut_1000': 0.2186}, 'anserini.covid-r4.abstract.qdel.bm25+rm3Rf.txt': - {'topics': 45, 'ndcg_cut_10': 0.4598, 'judged_cut_10': 0.5044, 'ndcg_cut_20': 0.4404, - 'judged_cut_20': 0.5167, 'map': 0.1909, 'recall_1000': 0.5330, 'judged_cut_1000': 0.1889}, + {'topics': 45, 'ndcg_cut_10': 0.4521, 'judged_cut_10': 0.5000, 'ndcg_cut_20': 0.4388, + 'judged_cut_20': 0.5167, 'map': 0.1904, 'recall_1000': 0.5322, 'judged_cut_1000': 0.1888}, } evaluate_runs(round3_cumulative_qrels, cumulative_runs, expected=expected_metrics, check_md5=check_md5_flag) expected_metrics = { 'anserini.covid-r4.abstract.qq.bm25.txt': - {'topics': 45, 'ndcg_cut_10': 0.6600, 'judged_cut_10': 0.9356, 'ndcg_cut_20': 0.6120, - 'judged_cut_20': 0.9111, 'map': 0.2780, 'recall_1000': 0.5019, 'judged_cut_1000': 0.2876}, + {'topics': 45, 'ndcg_cut_10': 0.6619, 'judged_cut_10': 0.9356, 'ndcg_cut_20': 0.6147, + 'judged_cut_20': 0.9100, 'map': 0.2779, 'recall_1000': 0.5018, 'judged_cut_1000': 0.2876}, 'anserini.covid-r4.abstract.qdel.bm25.txt': - {'topics': 45, 'ndcg_cut_10': 0.7081, 'judged_cut_10': 0.9844, 'ndcg_cut_20': 0.6650, - 'judged_cut_20': 0.9622, 'map': 0.2994, 'recall_1000': 0.5233, 'judged_cut_1000': 0.2987}, + {'topics': 45, 'ndcg_cut_10': 0.7087, 'judged_cut_10': 0.9844, 'ndcg_cut_20': 0.6662, + 'judged_cut_20': 0.9633, 'map': 0.2993, 'recall_1000': 0.5233, 'judged_cut_1000': 0.2987}, 'anserini.covid-r4.full-text.qq.bm25.txt': {'topics': 45, 'ndcg_cut_10': 0.4192, 'judged_cut_10': 0.8067, 'ndcg_cut_20': 0.3984, 'judged_cut_20': 0.7544, 'map': 0.1712, 'recall_1000': 0.4139, 'judged_cut_1000': 0.2740}, @@ -149,36 +149,36 @@ def main(): {'topics': 45, 'ndcg_cut_10': 0.5610, 'judged_cut_10': 0.9133, 'ndcg_cut_20': 0.5324, 'judged_cut_20': 0.8756, 'map': 0.2713, 'recall_1000': 0.5385, 'judged_cut_1000': 0.3386}, 'anserini.covid-r4.paragraph.qdel.bm25.txt': - {'topics': 45, 'ndcg_cut_10': 0.6477, 'judged_cut_10': 0.9644, 'ndcg_cut_20': 0.6084, + {'topics': 45, 'ndcg_cut_10': 0.6476, 'judged_cut_10': 0.9644, 'ndcg_cut_20': 0.6084, 'judged_cut_20': 0.9322, 'map': 0.2975, 'recall_1000': 0.5625, 'judged_cut_1000': 0.3443}, 'anserini.covid-r4.fusion1.txt': - {'topics': 45, 'ndcg_cut_10': 0.6271, 'judged_cut_10': 0.9689, 'ndcg_cut_20': 0.5968, - 'judged_cut_20': 0.9422, 'map': 0.2904, 'recall_1000': 0.5623, 'judged_cut_1000': 0.3519}, + {'topics': 45, 'ndcg_cut_10': 0.6279, 'judged_cut_10': 0.9689, 'ndcg_cut_20': 0.5969, + 'judged_cut_20': 0.9422, 'map': 0.2904, 'recall_1000': 0.5623, 'judged_cut_1000': 0.3520}, 'anserini.covid-r4.fusion2.txt': - {'topics': 45, 'ndcg_cut_10': 0.6802, 'judged_cut_10': 1.0000, 'ndcg_cut_20': 0.6573, - 'judged_cut_20': 0.9956, 'map': 0.3286, 'recall_1000': 0.5946, 'judged_cut_1000': 0.3625}, + {'topics': 45, 'ndcg_cut_10': 0.6803, 'judged_cut_10': 1.0000, 'ndcg_cut_20': 0.6577, + 'judged_cut_20': 0.9956, 'map': 0.3286, 'recall_1000': 0.5947, 'judged_cut_1000': 0.3626}, 'anserini.covid-r4.abstract.qdel.bm25+rm3Rf.txt': - {'topics': 45, 'ndcg_cut_10': 0.8056, 'judged_cut_10': 1.0000, 'ndcg_cut_20': 0.7649, - 'judged_cut_20': 0.9967, 'map': 0.3663, 'recall_1000': 0.5955, 'judged_cut_1000': 0.3229}, + {'topics': 45, 'ndcg_cut_10': 0.8057, 'judged_cut_10': 1.0000, 'ndcg_cut_20': 0.7661, + 'judged_cut_20': 0.9978, 'map': 0.3661, 'recall_1000': 0.5951, 'judged_cut_1000': 0.3229}, } evaluate_runs(round4_cumulative_qrels, cumulative_runs, expected=expected_metrics, check_md5=check_md5_flag) expected_metrics = { 'anserini.final-r4.fusion1.txt': - {'topics': 45, 'ndcg_cut_10': 0.5629, 'judged_cut_10': 0.8578, 'ndcg_cut_20': 0.5204, - 'judged_cut_20': 0.7922, 'map': 0.2656, 'recall_1000': 0.6571, 'judged_cut_1000': 0.1474}, + {'topics': 45, 'ndcg_cut_10': 0.5632, 'judged_cut_10': 0.8578, 'ndcg_cut_20': 0.5211, + 'judged_cut_20': 0.7933, 'map': 0.2657, 'recall_1000': 0.6573, 'judged_cut_1000': 0.1474}, 'anserini.final-r4.fusion1.post-processed.txt': {'topics': 45, 'ndcg_cut_10': 0.5658, 'judged_cut_10': 0.8578, 'ndcg_cut_20': 0.5244, 'judged_cut_20': 0.7978, 'map': 0.2666, 'recall_1000': 0.6571, 'judged_cut_1000': 0.1475}, 'anserini.final-r4.fusion2.txt': - {'topics': 45, 'ndcg_cut_10': 0.6376, 'judged_cut_10': 0.9778, 'ndcg_cut_20': 0.6047, - 'judged_cut_20': 0.8978, 'map': 0.3078, 'recall_1000': 0.6928, 'judged_cut_1000': 0.1558}, + {'topics': 45, 'ndcg_cut_10': 0.6389, 'judged_cut_10': 0.9778, 'ndcg_cut_20': 0.6054, + 'judged_cut_20': 0.8989, 'map': 0.3077, 'recall_1000': 0.6928, 'judged_cut_1000': 0.1559}, 'anserini.final-r4.fusion2.post-processed.txt': {'topics': 45, 'ndcg_cut_10': 0.6428, 'judged_cut_10': 0.9844, 'ndcg_cut_20': 0.6089, 'judged_cut_20': 0.9022, 'map': 0.3088, 'recall_1000': 0.6928, 'judged_cut_1000': 0.1559}, 'anserini.final-r4.rf.txt': - {'topics': 45, 'ndcg_cut_10': 0.7472, 'judged_cut_10': 0.9778, 'ndcg_cut_20': 0.6940, - 'judged_cut_20': 0.9233, 'map': 0.3506, 'recall_1000': 0.6962, 'judged_cut_1000': 0.1408}, + {'topics': 45, 'ndcg_cut_10': 0.7471, 'judged_cut_10': 0.9756, 'ndcg_cut_20': 0.6960, + 'judged_cut_20': 0.9244, 'map': 0.3504, 'recall_1000': 0.6960, 'judged_cut_1000': 0.1408}, 'anserini.final-r4.rf.post-processed.txt': {'topics': 45, 'ndcg_cut_10': 0.7516, 'judged_cut_10': 0.9867, 'ndcg_cut_20': 0.6976, 'judged_cut_20': 0.9278, 'map': 0.3519, 'recall_1000': 0.6962, 'judged_cut_1000': 0.1409}, diff --git a/src/main/python/trec-covid/generate_round4_doc2query_baselines.py b/src/main/python/trec-covid/generate_round4_doc2query_baselines.py index 21d67a9877..f80347d7e9 100644 --- a/src/main/python/trec-covid/generate_round4_doc2query_baselines.py +++ b/src/main/python/trec-covid/generate_round4_doc2query_baselines.py @@ -78,8 +78,8 @@ def perform_runs(cumulative_qrels): - base_topics = 'src/main/resources/topics-and-qrels/topics.covid-round4.xml' - udel_topics = 'src/main/resources/topics-and-qrels/topics.covid-round4-udel.xml' + base_topics = 'tools/topics-and-qrels/topics.covid-round4.xml' + udel_topics = 'tools/topics-and-qrels/topics.covid-round4-udel.xml' print('') print('## Running on abstract index...') @@ -213,9 +213,9 @@ def main(): if not (os.path.isdir(indexes[0]) and os.path.isdir(indexes[1]) and os.path.isdir(indexes[2])): print('Required indexes do not exist. Please download first.') - round3_cumulative_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round3-cumulative.txt' - round4_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round4.txt' - round4_cumulative_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round4-cumulative.txt' + round3_cumulative_qrels = 'tools/topics-and-qrels/qrels.covid-round3-cumulative.txt' + round4_qrels = 'tools/topics-and-qrels/qrels.covid-round4.txt' + round4_cumulative_qrels = 'tools/topics-and-qrels/qrels.covid-round4-cumulative.txt' # MD5 checksums don't match anymore, see https://github.com/castorini/anserini/issues/1669 check_md5_flag = False @@ -259,7 +259,7 @@ def main(): expected_metrics = { 'expanded.anserini.final-r4.fusion1.txt': {'topics': 45, 'ndcg_cut_10': 0.5395, 'judged_cut_10': 0.7222, 'ndcg_cut_20': 0.5115, - 'judged_cut_20': 0.6944, 'map': 0.2497, 'recall_1000': 0.6717, 'judged_cut_1000': 0.1424}, + 'judged_cut_20': 0.6944, 'map': 0.2498, 'recall_1000': 0.6717, 'judged_cut_1000': 0.1424}, 'expanded.anserini.final-r4.fusion2.txt': {'topics': 45, 'ndcg_cut_10': 0.5630, 'judged_cut_10': 0.7444, 'ndcg_cut_20': 0.5175, 'judged_cut_20': 0.6911, 'map': 0.2550, 'recall_1000': 0.6800, 'judged_cut_1000': 0.1434}, diff --git a/src/main/python/trec-covid/generate_round5_baselines.py b/src/main/python/trec-covid/generate_round5_baselines.py index 7f569de44d..cf6f8f0055 100644 --- a/src/main/python/trec-covid/generate_round5_baselines.py +++ b/src/main/python/trec-covid/generate_round5_baselines.py @@ -89,9 +89,9 @@ def main(): if not (os.path.isdir(indexes[0]) and os.path.isdir(indexes[1]) and os.path.isdir(indexes[2])): print('Required indexes do not exist. Please download first.') - round4_cumulative_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round4-cumulative.txt' - complete_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-complete.txt' - round5_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round5.txt' + round4_cumulative_qrels = 'tools/topics-and-qrels/qrels.covid-round4-cumulative.txt' + complete_qrels = 'tools/topics-and-qrels/qrels.covid-complete.txt' + round5_qrels = 'tools/topics-and-qrels/qrels.covid-round5.txt' # MD5 checksums don't match anymore, see https://github.com/castorini/anserini/issues/1669 check_md5_flag = False @@ -103,11 +103,11 @@ def main(): expected_metrics = { 'anserini.covid-r5.abstract.qq.bm25.txt': - {'topics': 50, 'ndcg_cut_10': 0.4580, 'judged_cut_10': 0.5880, 'ndcg_cut_20': 0.4379, - 'judged_cut_20': 0.5940, 'map': 0.1903, 'recall_1000': 0.4525, 'judged_cut_1000': 0.2264}, + {'topics': 50, 'ndcg_cut_10': 0.4569, 'judged_cut_10': 0.5860, 'ndcg_cut_20': 0.4417, + 'judged_cut_20': 0.5930, 'map': 0.1904, 'recall_1000': 0.4525, 'judged_cut_1000': 0.2264}, 'anserini.covid-r5.abstract.qdel.bm25.txt': - {'topics': 50, 'ndcg_cut_10': 0.4912, 'judged_cut_10': 0.6240, 'ndcg_cut_20': 0.4596, - 'judged_cut_20': 0.6040, 'map': 0.2042, 'recall_1000': 0.4714, 'judged_cut_1000': 0.2351}, + {'topics': 50, 'ndcg_cut_10': 0.4903, 'judged_cut_10': 0.6180, 'ndcg_cut_20': 0.4597, + 'judged_cut_20': 0.6030, 'map': 0.2041, 'recall_1000': 0.4714, 'judged_cut_1000': 0.2351}, 'anserini.covid-r5.full-text.qq.bm25.txt': {'topics': 50, 'ndcg_cut_10': 0.3240, 'judged_cut_10': 0.5660, 'ndcg_cut_20': 0.3055, 'judged_cut_20': 0.5250, 'map': 0.1324, 'recall_1000': 0.3758, 'judged_cut_1000': 0.2171}, @@ -115,30 +115,30 @@ def main(): {'topics': 50, 'ndcg_cut_10': 0.4634, 'judged_cut_10': 0.6460, 'ndcg_cut_20': 0.4387, 'judged_cut_20': 0.6280, 'map': 0.1793, 'recall_1000': 0.4368, 'judged_cut_1000': 0.2425}, 'anserini.covid-r5.paragraph.qq.bm25.txt': - {'topics': 50, 'ndcg_cut_10': 0.4077, 'judged_cut_10': 0.6160, 'ndcg_cut_20': 0.3907, - 'judged_cut_20': 0.5920, 'map': 0.1981, 'recall_1000': 0.4877, 'judged_cut_1000': 0.2661}, + {'topics': 50, 'ndcg_cut_10': 0.4061, 'judged_cut_10': 0.6160, 'ndcg_cut_20': 0.3900, + 'judged_cut_20': 0.5910, 'map': 0.1980, 'recall_1000': 0.4877, 'judged_cut_1000': 0.2661}, 'anserini.covid-r5.paragraph.qdel.bm25.txt': - {'topics': 50, 'ndcg_cut_10': 0.4918, 'judged_cut_10': 0.6440, 'ndcg_cut_20': 0.4569, - 'judged_cut_20': 0.6250, 'map': 0.2163, 'recall_1000': 0.5101, 'judged_cut_1000': 0.2710}, + {'topics': 50, 'ndcg_cut_10': 0.4905, 'judged_cut_10': 0.6440, 'ndcg_cut_20': 0.4569, + 'judged_cut_20': 0.6250, 'map': 0.2163, 'recall_1000': 0.5099, 'judged_cut_1000': 0.2710}, 'anserini.covid-r5.fusion1.txt': - {'topics': 50, 'ndcg_cut_10': 0.4696, 'judged_cut_10': 0.6520, 'ndcg_cut_20': 0.4539, - 'judged_cut_20': 0.6490, 'map': 0.2044, 'recall_1000': 0.5027, 'judged_cut_1000': 0.2751}, + {'topics': 50, 'ndcg_cut_10': 0.4696, 'judged_cut_10': 0.6540, 'ndcg_cut_20': 0.4525, + 'judged_cut_20': 0.6500, 'map': 0.2042, 'recall_1000': 0.5027, 'judged_cut_1000': 0.2750}, 'anserini.covid-r5.fusion2.txt': - {'topics': 50, 'ndcg_cut_10': 0.5077, 'judged_cut_10': 0.6800, 'ndcg_cut_20': 0.4956, - 'judged_cut_20': 0.6690, 'map': 0.2304, 'recall_1000': 0.5378, 'judged_cut_1000': 0.2851}, + {'topics': 50, 'ndcg_cut_10': 0.5057, 'judged_cut_10': 0.6800, 'ndcg_cut_20': 0.4954, + 'judged_cut_20': 0.6690, 'map': 0.2303, 'recall_1000': 0.5377, 'judged_cut_1000': 0.2851}, 'anserini.covid-r5.abstract.qdel.bm25+rm3Rf.txt': - {'topics': 50, 'ndcg_cut_10': 0.6177, 'judged_cut_10': 0.6620, 'ndcg_cut_20': 0.5738, - 'judged_cut_20': 0.6510, 'map': 0.2657, 'recall_1000': 0.5505, 'judged_cut_1000': 0.2562}, + {'topics': 50, 'ndcg_cut_10': 0.6207, 'judged_cut_10': 0.6560, 'ndcg_cut_20': 0.5782, + 'judged_cut_20': 0.6470, 'map': 0.2656, 'recall_1000': 0.5509, 'judged_cut_1000': 0.2563}, } evaluate_runs(round4_cumulative_qrels, cumulative_runs, expected=expected_metrics, check_md5=check_md5_flag) expected_metrics = { 'anserini.covid-r5.abstract.qq.bm25.txt': - {'topics': 50, 'ndcg_cut_10': 0.6925, 'judged_cut_10': 0.9740, 'ndcg_cut_20': 0.6586, - 'judged_cut_20': 0.9700, 'map': 0.3010, 'recall_1000': 0.4636, 'judged_cut_1000': 0.4159}, + {'topics': 50, 'ndcg_cut_10': 0.6899, 'judged_cut_10': 0.9760, 'ndcg_cut_20': 0.6557, + 'judged_cut_20': 0.9700, 'map': 0.3009, 'recall_1000': 0.4636, 'judged_cut_1000': 0.4159}, 'anserini.covid-r5.abstract.qdel.bm25.txt': - {'topics': 50, 'ndcg_cut_10': 0.7301, 'judged_cut_10': 0.9980, 'ndcg_cut_20': 0.6979, - 'judged_cut_20': 0.9900, 'map': 0.3230, 'recall_1000': 0.4839, 'judged_cut_1000': 0.4286}, + {'topics': 50, 'ndcg_cut_10': 0.7260, 'judged_cut_10': 0.9960, 'ndcg_cut_20': 0.6975, + 'judged_cut_20': 0.9900, 'map': 0.3229, 'recall_1000': 0.4838, 'judged_cut_1000': 0.4286}, 'anserini.covid-r5.full-text.qq.bm25.txt': {'topics': 50, 'ndcg_cut_10': 0.4709, 'judged_cut_10': 0.8920, 'ndcg_cut_20': 0.4382, 'judged_cut_20': 0.8370, 'map': 0.1777, 'recall_1000': 0.3427, 'judged_cut_1000': 0.3397}, @@ -146,39 +146,39 @@ def main(): {'topics': 50, 'ndcg_cut_10': 0.6286, 'judged_cut_10': 0.9840, 'ndcg_cut_20': 0.5973, 'judged_cut_20': 0.9630, 'map': 0.2391, 'recall_1000': 0.4087, 'judged_cut_1000': 0.3875}, 'anserini.covid-r5.paragraph.qq.bm25.txt': - {'topics': 50, 'ndcg_cut_10': 0.5832, 'judged_cut_10': 0.9600, 'ndcg_cut_20': 0.5659, + {'topics': 50, 'ndcg_cut_10': 0.5832, 'judged_cut_10': 0.9600, 'ndcg_cut_20': 0.5656, 'judged_cut_20': 0.9390, 'map': 0.2808, 'recall_1000': 0.4695, 'judged_cut_1000': 0.4412}, 'anserini.covid-r5.paragraph.qdel.bm25.txt': - {'topics': 50, 'ndcg_cut_10': 0.6764, 'judged_cut_10': 0.9840, 'ndcg_cut_20': 0.6368, + {'topics': 50, 'ndcg_cut_10': 0.6764, 'judged_cut_10': 0.9840, 'ndcg_cut_20': 0.6367, 'judged_cut_20': 0.9740, 'map': 0.3089, 'recall_1000': 0.4949, 'judged_cut_1000': 0.4542}, 'anserini.covid-r5.fusion1.txt': - {'topics': 50, 'ndcg_cut_10': 0.6469, 'judged_cut_10': 0.9860, 'ndcg_cut_20': 0.6184, - 'judged_cut_20': 0.9800, 'map': 0.2952, 'recall_1000': 0.4967, 'judged_cut_1000': 0.4675}, + {'topics': 50, 'ndcg_cut_10': 0.6488, 'judged_cut_10': 0.9860, 'ndcg_cut_20': 0.6190, + 'judged_cut_20': 0.9800, 'map': 0.2952, 'recall_1000': 0.4966, 'judged_cut_1000': 0.4674}, 'anserini.covid-r5.fusion2.txt': - {'topics': 50, 'ndcg_cut_10': 0.6972, 'judged_cut_10': 1.0000, 'ndcg_cut_20': 0.6785, + {'topics': 50, 'ndcg_cut_10': 0.6946, 'judged_cut_10': 1.0000, 'ndcg_cut_20': 0.6787, 'judged_cut_20': 1.000, 'map': 0.3329, 'recall_1000': 0.5313, 'judged_cut_1000': 0.4869}, 'anserini.covid-r5.abstract.qdel.bm25+rm3Rf.txt': - {'topics': 50, 'ndcg_cut_10': 0.8395, 'judged_cut_10': 1.0000, 'ndcg_cut_20': 0.7955, - 'judged_cut_20': 0.9990, 'map': 0.3911, 'recall_1000': 0.5536, 'judged_cut_1000': 0.4607}, + {'topics': 50, 'ndcg_cut_10': 0.8406, 'judged_cut_10': 1.0000, 'ndcg_cut_20': 0.7965, + 'judged_cut_20': 1.000, 'map': 0.3912, 'recall_1000': 0.5540, 'judged_cut_1000': 0.4610}, } evaluate_runs(complete_qrels, cumulative_runs, expected=expected_metrics, check_md5=check_md5_flag) expected_metrics = { 'anserini.final-r5.fusion1.txt': - {'topics': 50, 'ndcg_cut_10': 0.5668, 'judged_cut_10': 0.9140, 'ndcg_cut_20': 0.5244, - 'judged_cut_20': 0.8490, 'map': 0.2302, 'recall_1000': 0.5615, 'judged_cut_1000': 0.2148}, + {'topics': 50, 'ndcg_cut_10': 0.5665, 'judged_cut_10': 0.9140, 'ndcg_cut_20': 0.5247, + 'judged_cut_20': 0.8490, 'map': 0.2302, 'recall_1000': 0.5616, 'judged_cut_1000': 0.2149}, 'anserini.final-r5.fusion1.post-processed.txt': {'topics': 50, 'ndcg_cut_10': 0.5726, 'judged_cut_10': 0.9240, 'ndcg_cut_20': 0.5313, 'judged_cut_20': 0.8570, 'map': 0.2314, 'recall_1000': 0.5615, 'judged_cut_1000': 0.2151}, 'anserini.final-r5.fusion2.txt': - {'topics': 50, 'ndcg_cut_10': 0.6366, 'judged_cut_10': 0.9640, 'ndcg_cut_20': 0.5941, - 'judged_cut_20': 0.9080, 'map': 0.2716, 'recall_1000': 0.6012, 'judged_cut_1000': 0.2263}, + {'topics': 50, 'ndcg_cut_10': 0.6387, 'judged_cut_10': 0.9620, 'ndcg_cut_20': 0.5955, + 'judged_cut_20': 0.9090, 'map': 0.2719, 'recall_1000': 0.6013, 'judged_cut_1000': 0.2263}, 'anserini.final-r5.fusion2.post-processed.txt': {'topics': 50, 'ndcg_cut_10': 0.6474, 'judged_cut_10': 0.9780, 'ndcg_cut_20': 0.6007, 'judged_cut_20': 0.9150, 'map': 0.2734, 'recall_1000': 0.6012, 'judged_cut_1000': 0.2267}, 'anserini.final-r5.rf.txt': - {'topics': 50, 'ndcg_cut_10': 0.7777, 'judged_cut_10': 0.9680, 'ndcg_cut_20': 0.7193, - 'judged_cut_20': 0.9270, 'map': 0.3235, 'recall_1000': 0.6378, 'judged_cut_1000': 0.2197}, + {'topics': 50, 'ndcg_cut_10': 0.7789, 'judged_cut_10': 0.9700, 'ndcg_cut_20': 0.7188, + 'judged_cut_20': 0.9270, 'map': 0.3234, 'recall_1000': 0.6378, 'judged_cut_1000': 0.2197}, 'anserini.final-r5.rf.post-processed.txt': {'topics': 50, 'ndcg_cut_10': 0.7944, 'judged_cut_10': 0.9860, 'ndcg_cut_20': 0.7346, 'judged_cut_20': 0.9470, 'map': 0.3280, 'recall_1000': 0.6378, 'judged_cut_1000': 0.2201}, diff --git a/src/main/python/trec-covid/generate_round5_doc2query_baselines.py b/src/main/python/trec-covid/generate_round5_doc2query_baselines.py index 97e97a9628..de88990932 100644 --- a/src/main/python/trec-covid/generate_round5_doc2query_baselines.py +++ b/src/main/python/trec-covid/generate_round5_doc2query_baselines.py @@ -87,8 +87,8 @@ def perform_runs(): - base_topics = 'src/main/resources/topics-and-qrels/topics.covid-round5.xml' - udel_topics = 'src/main/resources/topics-and-qrels/topics.covid-round5-udel.xml' + base_topics = 'tools/topics-and-qrels/topics.covid-round5.xml' + udel_topics = 'tools/topics-and-qrels/topics.covid-round5-udel.xml' print('') print('## Running on abstract index...') @@ -109,7 +109,7 @@ def perform_runs(): os.system(f'target/appassembler/bin/SearchCollection -index {abstract_index} ' + f'-topicreader Covid -topics {udel_topics} -topicfield query -removedups ' + f'-bm25 -rm3 -rm3.fbTerms 100 -hits 10000 ' + - f'-rf.qrels src/main/resources/topics-and-qrels/qrels.covid-round4-cumulative.txt ' + + f'-rf.qrels tools/topics-and-qrels/qrels.covid-round4-cumulative.txt ' + f'-output runs/{abstract_prefix}.qdel.bm25+rm3Rf.txt -runtag {abstract_prefix}.qdel.bm25+rm3Rf.txt') print('') @@ -222,9 +222,9 @@ def main(): if not (os.path.isdir(indexes[0]) and os.path.isdir(indexes[1]) and os.path.isdir(indexes[2])): print('Required indexes do not exist. Please download first.') - round4_cumulative_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round4-cumulative.txt' - complete_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-complete.txt' - round5_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round5.txt' + round4_cumulative_qrels = 'tools/topics-and-qrels/qrels.covid-round4-cumulative.txt' + complete_qrels = 'tools/topics-and-qrels/qrels.covid-complete.txt' + round5_qrels = 'tools/topics-and-qrels/qrels.covid-round5.txt' # MD5 checksums don't match anymore, see https://github.com/castorini/anserini/issues/1669 check_md5_flag = False diff --git a/src/main/python/trec-covid/index_cord19.py b/src/main/python/trec-covid/index_cord19.py index 652695e3dd..869052d369 100644 --- a/src/main/python/trec-covid/index_cord19.py +++ b/src/main/python/trec-covid/index_cord19.py @@ -113,7 +113,7 @@ def build_indexes(date): def evaluate_run(run): - qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round3-cumulative.txt' + qrels = 'tools/topics-and-qrels/qrels.covid-round3-cumulative.txt' metrics = {} output = subprocess.check_output( f'tools/eval/trec_eval.9.0.4/trec_eval -c -m ndcg_cut.10 {qrels} runs/{run}', shell=True) @@ -131,8 +131,8 @@ def evaluate_run(run): def verify_indexes(date): - topics = 'src/main/resources/topics-and-qrels/topics.covid-round3.xml' - whitelist = 'src/main/resources/topics-and-qrels/docids.covid.round3.txt' + topics = 'tools/topics-and-qrels/topics.covid-round3.xml' + whitelist = 'tools/topics-and-qrels/docids.covid.round3.txt' print('Verifying abstract index...') abstract_index = f'indexes/lucene-index-cord19-abstract-{date} '