Skip to content

Commit

Permalink
Fix TREC-COVID regressions (#2196)
Browse files Browse the repository at this point in the history
  • Loading branch information
lintool authored Sep 18, 2023
1 parent bde7fb4 commit 88935fc
Show file tree
Hide file tree
Showing 10 changed files with 147 additions and 147 deletions.
14 changes: 7 additions & 7 deletions src/main/python/trec-covid/covid_baseline_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#

import math
import os
import re
import subprocess
Expand All @@ -22,11 +22,11 @@


def perform_runs(round_number, indexes):
base_topics = f'src/main/resources/topics-and-qrels/topics.covid-round{round_number}.xml'
udel_topics = f'src/main/resources/topics-and-qrels/topics.covid-round{round_number}-udel.xml'
base_topics = f'tools/topics-and-qrels/topics.covid-round{round_number}.xml'
udel_topics = f'tools/topics-and-qrels/topics.covid-round{round_number}-udel.xml'

# Use cumulative qrels from previous round for relevance feedback runs
cumulative_qrels = f'src/main/resources/topics-and-qrels/qrels.covid-round{round_number - 1}-cumulative.txt'
cumulative_qrels = f'tools/topics-and-qrels/qrels.covid-round{round_number - 1}-cumulative.txt'

print('')
print('## Running on abstract index...')
Expand Down Expand Up @@ -119,7 +119,7 @@ def perform_fusion(round_number, run_checksums, check_md5=True):

def prepare_final_submissions(round_number, run_checksums, check_md5=True):
# Remove the cumulative qrels from the previous round.
qrels = f'src/main/resources/topics-and-qrels/qrels.covid-round{round_number - 1}-cumulative.txt'
qrels = f'tools/topics-and-qrels/qrels.covid-round{round_number - 1}-cumulative.txt'

print('')
print('## Preparing final submission files by removing qrels...')
Expand Down Expand Up @@ -235,8 +235,8 @@ def evaluate_runs(qrels, runs, expected={}, check_md5=True):
for key in ['topics', 'ndcg_cut_10', 'judged_cut_10', 'ndcg_cut_20',
'judged_cut_20', 'map', 'recall_1000', 'judged_cut_1000']:
if key in expected[run]:
assert metrics[key] == expected[run][key],\
f'\'{key}\' doesn\'t match, expected {expected[run][key]} got {metrics[key]}!'
assert math.isclose(metrics[key], expected[run][key], rel_tol=1e-4), \
f'\'{key}\' doesn\'t match, expected {expected[run][key]:.4f} got {metrics[key]:.4f}!'

if check_md5:
assert metrics['md5'] == runs[run], f'Error in producing {run}!'
Expand Down
4 changes: 2 additions & 2 deletions src/main/python/trec-covid/generate_query_udel.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,14 +95,14 @@
original_query_file_name = f'topics.covid-round{args.round}.xml'
original_query_file = os.path.join(
args.anserini_root,
'src/main/resources/topics-and-qrels',
'tools/topics-and-qrels',
original_query_file_name
)

output_query_file_name = f'topics.covid-round{args.round}-udel.xml'
output_query_file = os.path.join(
args.anserini_root,
'src/main/resources/topics-and-qrels',
'tools/topics-and-qrels',
output_query_file_name
)

Expand Down
14 changes: 7 additions & 7 deletions src/main/python/trec-covid/generate_round1_baselines.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@


def perform_runs():
base_topics = f'src/main/resources/topics-and-qrels/topics.covid-round1.xml'
udel_topics = f'src/main/resources/topics-and-qrels/topics.covid-round1-udel.xml'
base_topics = f'tools/topics-and-qrels/topics.covid-round1.xml'
udel_topics = f'tools/topics-and-qrels/topics.covid-round1-udel.xml'

print('')
print('## Running on abstract index...')
Expand Down Expand Up @@ -178,7 +178,7 @@ def main():
if not (os.path.isdir(indexes[0]) and os.path.isdir(indexes[1]) and os.path.isdir(indexes[2])):
print('Required indexes do not exist. Please download first.')

round1_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round1.txt'
round1_qrels = 'tools/topics-and-qrels/qrels.covid-round1.txt'

# Note that this script was written after this issue was noted: https://github.com/castorini/anserini/issues/1669
# Thus, no point in checking MD5.
Expand All @@ -189,7 +189,7 @@ def main():

expected_metrics = {
'anserini.covid-r1.abstract.query.bm25.txt':
{'ndcg_cut_10': 0.4100, 'judged_cut_10': 0.8267, 'recall_1000': 0.5279},
{'ndcg_cut_10': 0.4100, 'judged_cut_10': 0.8267, 'recall_1000': 0.5285},
'anserini.covid-r1.abstract.question.bm25.txt':
{'ndcg_cut_10': 0.5179, 'judged_cut_10': 0.9833, 'recall_1000': 0.6313},
'anserini.covid-r1.abstract.query+question.bm25.txt':
Expand All @@ -213,17 +213,17 @@ def main():
'anserini.covid-r1.full-text.query-covid19.bm25.txt':
{'ndcg_cut_10': 0.2434, 'judged_cut_10': 0.5233, 'recall_1000': 0.5692},
'anserini.covid-r1.paragraph.query.bm25.txt':
{'ndcg_cut_10': 0.4302, 'judged_cut_10': 0.8400, 'recall_1000': 0.4327},
{'ndcg_cut_10': 0.4303, 'judged_cut_10': 0.8400, 'recall_1000': 0.4324},
'anserini.covid-r1.paragraph.question.bm25.txt':
{'ndcg_cut_10': 0.4410, 'judged_cut_10': 0.9167, 'recall_1000': 0.5111},
{'ndcg_cut_10': 0.4410, 'judged_cut_10': 0.9167, 'recall_1000': 0.5108},
'anserini.covid-r1.paragraph.query+question.bm25.txt':
{'ndcg_cut_10': 0.5450, 'judged_cut_10': 0.9733, 'recall_1000': 0.5743},
'anserini.covid-r1.paragraph.query+question+narrative.bm25.txt':
{'ndcg_cut_10': 0.4899, 'judged_cut_10': 0.8967, 'recall_1000': 0.5918},
'anserini.covid-r1.paragraph.query-udel.bm25.txt':
{'ndcg_cut_10': 0.5544, 'judged_cut_10': 0.9200, 'recall_1000': 0.5640},
'anserini.covid-r1.paragraph.query-covid19.bm25.txt':
{'ndcg_cut_10': 0.3180, 'judged_cut_10': 0.5333, 'recall_1000': 0.3552},
{'ndcg_cut_10': 0.3180, 'judged_cut_10': 0.5300, 'recall_1000': 0.3552},
'anserini.covid-r1.fusion1.txt':
{'ndcg_cut_10': 0.5716, 'judged_cut_10': 0.9867, 'recall_1000': 0.8122},
'anserini.covid-r1.fusion2.txt':
Expand Down
22 changes: 11 additions & 11 deletions src/main/python/trec-covid/generate_round2_baselines.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,8 @@


def perform_runs():
base_topics = f'src/main/resources/topics-and-qrels/topics.covid-round2.xml'
udel_topics = f'src/main/resources/topics-and-qrels/topics.covid-round2-udel.xml'
base_topics = f'tools/topics-and-qrels/topics.covid-round2.xml'
udel_topics = f'tools/topics-and-qrels/topics.covid-round2-udel.xml'

print('')
print('## Running on abstract index...')
Expand Down Expand Up @@ -135,7 +135,7 @@ def perform_fusion(run_checksums, check_md5=True):

def prepare_final_submissions(run_checksums, check_md5=True):
# Remove the cumulative qrels from the previous round.
qrels = f'src/main/resources/topics-and-qrels/qrels.covid-round1.txt'
qrels = f'tools/topics-and-qrels/qrels.covid-round1.txt'

print('')
print('## Preparing final submission files by removing qrels...')
Expand Down Expand Up @@ -170,8 +170,8 @@ def main():
if not (os.path.isdir(indexes[0]) and os.path.isdir(indexes[1]) and os.path.isdir(indexes[2])):
print('Required indexes do not exist. Please download first.')

round1_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round1.txt'
round2_qrels = 'src/main/resources/topics-and-qrels/qrels.covid-round2.txt'
round1_qrels = 'tools/topics-and-qrels/qrels.covid-round1.txt'
round2_qrels = 'tools/topics-and-qrels/qrels.covid-round2.txt'

# Note that this script was written after this issue was noted: https://github.com/castorini/anserini/issues/1669
# Thus, no point in checking MD5.
Expand All @@ -183,23 +183,23 @@ def main():

expected_metrics = {
'anserini.covid-r2.abstract.qq.bm25.txt':
{'topics': 35, 'ndcg_cut_10': 0.3522, 'judged_cut_10': 0.5371, 'ndcg_cut_20': 0.3171,
{'topics': 35, 'ndcg_cut_10': 0.3521, 'judged_cut_10': 0.5371, 'ndcg_cut_20': 0.3170,
'judged_cut_20': 0.5100, 'map': 0.1752, 'recall_1000': 0.6601, 'judged_cut_1000': 0.1013},
'anserini.covid-r2.abstract.qdel.bm25.txt':
{'topics': 35, 'ndcg_cut_10': 0.3781, 'judged_cut_10': 0.5371, 'ndcg_cut_20': 0.3462,
'judged_cut_20': 0.4829, 'map': 0.1804, 'recall_1000': 0.6485, 'judged_cut_1000': 0.0958},
'judged_cut_20': 0.4829, 'map': 0.1803, 'recall_1000': 0.6485, 'judged_cut_1000': 0.0958},
'anserini.covid-r2.full-text.qq.bm25.txt':
{'topics': 35, 'ndcg_cut_10': 0.2070, 'judged_cut_10': 0.4286, 'ndcg_cut_20': 0.1931,
'judged_cut_20': 0.3929, 'map': 0.1159, 'recall_1000': 0.5953, 'judged_cut_1000': 0.0995},
'anserini.covid-r2.full-text.qdel.bm25.txt':
{'topics': 35, 'ndcg_cut_10': 0.3123, 'judged_cut_10': 0.4229, 'ndcg_cut_20': 0.2738,
'judged_cut_20': 0.3929, 'map': 0.1473, 'recall_1000': 0.6517, 'judged_cut_1000': 0.1022},
'anserini.covid-r2.paragraph.qq.bm25.txt':
{'topics': 35, 'ndcg_cut_10': 0.2772, 'judged_cut_10': 0.4400, 'ndcg_cut_20': 0.2579,
{'topics': 35, 'ndcg_cut_10': 0.2770, 'judged_cut_10': 0.4400, 'ndcg_cut_20': 0.2578,
'judged_cut_20': 0.4529, 'map': 0.1607, 'recall_1000': 0.7248, 'judged_cut_1000': 0.1220},
'anserini.covid-r2.paragraph.qdel.bm25.txt':
{'topics': 35, 'ndcg_cut_10': 0.3353, 'judged_cut_10': 0.4343, 'ndcg_cut_20': 0.2956,
'judged_cut_20': 0.4329, 'map': 0.1772, 'recall_1000': 0.7196, 'judged_cut_1000': 0.1136},
{'topics': 35, 'ndcg_cut_10': 0.3350, 'judged_cut_10': 0.4343, 'ndcg_cut_20': 0.2954,
'judged_cut_20': 0.4329, 'map': 0.1772, 'recall_1000': 0.7196, 'judged_cut_1000': 0.1137},
'anserini.covid-r2.fusion1.txt':
{'topics': 35, 'ndcg_cut_10': 0.3297, 'judged_cut_10': 0.4657, 'ndcg_cut_20': 0.3060,
'judged_cut_20': 0.4643, 'map': 0.1914, 'recall_1000': 0.7561, 'judged_cut_1000': 0.1304},
Expand All @@ -216,7 +216,7 @@ def main():
{'topics': 35, 'ndcg_cut_10': 0.4827, 'judged_cut_10': 0.9543, 'ndcg_cut_20': 0.4512,
'judged_cut_20': 0.8614, 'map': 0.2431, 'recall_1000': 0.6475, 'judged_cut_1000': 0.1463},
'anserini.final-r2.fusion2.txt':
{'topics': 35, 'ndcg_cut_10': 0.5553, 'judged_cut_10': 0.9743, 'ndcg_cut_20': 0.5058,
{'topics': 35, 'ndcg_cut_10': 0.5553, 'judged_cut_10': 0.9714, 'ndcg_cut_20': 0.5058,
'judged_cut_20': 0.8957, 'map': 0.2739, 'recall_1000': 0.6832, 'judged_cut_1000': 0.1528},
}
evaluate_runs(round2_qrels, final_runs, expected=expected_metrics, check_md5=check_md5_flag)
Expand Down
Loading

0 comments on commit 88935fc

Please # to comment.