Skip to content

Commit

Permalink
Fix TREC-COVID regressions (take 2) (#2201)
Browse files Browse the repository at this point in the history
  • Loading branch information
lintool authored Sep 20, 2023
1 parent 88935fc commit 444eacc
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -251,21 +251,21 @@ def main():
{'topics': 45, 'ndcg_cut_10': 0.6618, 'judged_cut_10': 0.8622, 'ndcg_cut_20': 0.6331,
'judged_cut_20': 0.8444, 'map': 0.2974, 'recall_1000': 0.5847, 'judged_cut_1000': 0.3344},
'expanded.anserini.covid-r4.abstract.qdel.bm25+rm3Rf.txt':
{'topics': 45, 'ndcg_cut_10': 0.7447, 'judged_cut_10': 0.8933, 'ndcg_cut_20': 0.7067,
'judged_cut_20': 0.8589, 'map': 0.3182, 'recall_1000': 0.5812, 'judged_cut_1000': 0.2904},
{'topics': 45, 'ndcg_cut_10': 0.7436, 'judged_cut_10': 0.8911, 'ndcg_cut_20': 0.7067,
'judged_cut_20': 0.8600, 'map': 0.3182, 'recall_1000': 0.5811, 'judged_cut_1000': 0.2904},
}
evaluate_runs(round4_cumulative_qrels, cumulative_runs, expected=expected_metrics, check_md5=check_md5_flag)

expected_metrics = {
'expanded.anserini.final-r4.fusion1.txt':
{'topics': 45, 'ndcg_cut_10': 0.5395, 'judged_cut_10': 0.7222, 'ndcg_cut_20': 0.5115,
'judged_cut_20': 0.6944, 'map': 0.2498, 'recall_1000': 0.6717, 'judged_cut_1000': 0.1424},
'judged_cut_20': 0.6944, 'map': 0.2497, 'recall_1000': 0.6717, 'judged_cut_1000': 0.1424},
'expanded.anserini.final-r4.fusion2.txt':
{'topics': 45, 'ndcg_cut_10': 0.5630, 'judged_cut_10': 0.7444, 'ndcg_cut_20': 0.5175,
'judged_cut_20': 0.6911, 'map': 0.2550, 'recall_1000': 0.6800, 'judged_cut_1000': 0.1434},
'expanded.anserini.final-r4.rf.txt':
{'topics': 45, 'ndcg_cut_10': 0.6062, 'judged_cut_10': 0.7378, 'ndcg_cut_20': 0.5606,
'judged_cut_20': 0.6833, 'map': 0.2658, 'recall_1000': 0.6759, 'judged_cut_1000': 0.1284},
{'topics': 45, 'ndcg_cut_10': 0.6086, 'judged_cut_10': 0.7400, 'ndcg_cut_20': 0.5619,
'judged_cut_20': 0.6844, 'map': 0.2659, 'recall_1000': 0.6765, 'judged_cut_1000': 0.1284},
}
evaluate_runs(round4_qrels, final_runs, expected=expected_metrics, check_md5=check_md5_flag)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ def main():
'judged_cut_20': 0.5120, 'map': 0.1728, 'recall_1000': 0.4462, 'judged_cut_1000': 0.2059},
'expanded.anserini.covid-r5.abstract.qdel.bm25.txt':
{'topics': 50, 'ndcg_cut_10': 0.4548, 'judged_cut_10': 0.5000, 'ndcg_cut_20': 0.4260,
'judged_cut_20': 0.4880, 'map': 0.1742, 'recall_1000': 0.4527, 'judged_cut_1000': 0.2051},
'judged_cut_20': 0.4880, 'map': 0.1742, 'recall_1000': 0.4528, 'judged_cut_1000': 0.2051},
'expanded.anserini.covid-r5.full-text.qq.bm25.txt':
{'topics': 50, 'ndcg_cut_10': 0.4450, 'judged_cut_10': 0.6020, 'ndcg_cut_20': 0.4208,
'judged_cut_20': 0.5820, 'map': 0.1801, 'recall_1000': 0.4473, 'judged_cut_1000': 0.2393},
Expand All @@ -260,8 +260,8 @@ def main():
{'topics': 50, 'ndcg_cut_10': 0.4846, 'judged_cut_10': 0.5740, 'ndcg_cut_20': 0.4565,
'judged_cut_20': 0.5400, 'map': 0.2045, 'recall_1000': 0.5218, 'judged_cut_1000': 0.2578},
'expanded.anserini.covid-r5.abstract.qdel.bm25+rm3Rf.txt':
{'topics': 50, 'ndcg_cut_10': 0.6095, 'judged_cut_10': 0.6320, 'ndcg_cut_20': 0.5693,
'judged_cut_20': 0.5990, 'map': 0.2344, 'recall_1000': 0.5280, 'judged_cut_1000': 0.2257},
{'topics': 50, 'ndcg_cut_10': 0.6121, 'judged_cut_10': 0.6340, 'ndcg_cut_20': 0.5705,
'judged_cut_20': 0.6000, 'map': 0.2345, 'recall_1000': 0.5279, 'judged_cut_1000': 0.2255},
}
evaluate_runs(round4_cumulative_qrels, cumulative_runs, expected=expected_metrics, check_md5=check_md5_flag)

Expand All @@ -271,7 +271,7 @@ def main():
'judged_cut_20': 0.9600, 'map': 0.2718, 'recall_1000': 0.4550, 'judged_cut_1000': 0.3845},
'expanded.anserini.covid-r5.abstract.qdel.bm25.txt':
{'topics': 50, 'ndcg_cut_10': 0.6939, 'judged_cut_10': 0.9920, 'ndcg_cut_20': 0.6524,
'judged_cut_20': 0.9610, 'map': 0.2752, 'recall_1000': 0.4595, 'judged_cut_1000': 0.3825},
'judged_cut_20': 0.9610, 'map': 0.2752, 'recall_1000': 0.4596, 'judged_cut_1000': 0.3825},
'expanded.anserini.covid-r5.full-text.qq.bm25.txt':
{'topics': 50, 'ndcg_cut_10': 0.6300, 'judged_cut_10': 0.9680, 'ndcg_cut_20': 0.5843,
'judged_cut_20': 0.9260, 'map': 0.2475, 'recall_1000': 0.4201, 'judged_cut_1000': 0.3921},
Expand All @@ -291,8 +291,8 @@ def main():
{'topics': 50, 'ndcg_cut_10': 0.7131, 'judged_cut_10': 1.0000, 'ndcg_cut_20': 0.6755,
'judged_cut_20': 0.9910, 'map': 0.3036, 'recall_1000': 0.5166, 'judged_cut_1000': 0.4518},
'expanded.anserini.covid-r5.abstract.qdel.bm25+rm3Rf.txt':
{'topics': 50, 'ndcg_cut_10': 0.8160, 'judged_cut_10': 1.0000, 'ndcg_cut_20': 0.7787,
'judged_cut_20': 0.9960, 'map': 0.3421, 'recall_1000': 0.5249, 'judged_cut_1000': 0.4107},
{'topics': 50, 'ndcg_cut_10': 0.8175, 'judged_cut_10': 1.0000, 'ndcg_cut_20': 0.7778,
'judged_cut_20': 0.9950, 'map': 0.3421, 'recall_1000': 0.5250, 'judged_cut_1000': 0.4106},
}
evaluate_runs(complete_qrels, cumulative_runs, expected=expected_metrics, check_md5=check_md5_flag)

Expand All @@ -310,8 +310,8 @@ def main():
{'topics': 50, 'ndcg_cut_10': 0.5825, 'judged_cut_10': 0.9680, 'ndcg_cut_20': 0.5436,
'judged_cut_20': 0.8700, 'map': 0.2319, 'recall_1000': 0.5861, 'judged_cut_1000': 0.2138},
'expanded.anserini.final-r5.rf.txt':
{'topics': 50, 'ndcg_cut_10': 0.6628, 'judged_cut_10': 0.9460, 'ndcg_cut_20': 0.6040,
'judged_cut_20': 0.8370, 'map': 0.2410, 'recall_1000': 0.6039, 'judged_cut_1000': 0.1995},
{'topics': 50, 'ndcg_cut_10': 0.6620, 'judged_cut_10': 0.9460, 'ndcg_cut_20': 0.6053,
'judged_cut_20': 0.8380, 'map': 0.2409, 'recall_1000': 0.6034, 'judged_cut_1000': 0.1993},
'expanded.anserini.final-r5.rf.post-processed.txt':
{'topics': 50, 'ndcg_cut_10': 0.6757, 'judged_cut_10': 0.9620, 'ndcg_cut_20': 0.6124,
'judged_cut_20': 0.8470, 'map': 0.2433, 'recall_1000': 0.6039, 'judged_cut_1000': 0.1998},
Expand Down

0 comments on commit 444eacc

Please # to comment.