From 308138ede94622fd16416832329f7f44e59f6d1e Mon Sep 17 00:00:00 2001 From: Jimmy Lin Date: Sat, 18 Jun 2022 20:45:34 -0400 Subject: [PATCH] Update docs to add notes about BEIR uniCOIL and HC4 regressions (#1915) --- README.md | 3 +- docs/regressions-log.md | 13 +++++++ docs/regressions.md | 40 +++++++++++++++++++++ src/main/python/beir/gather_beir_results.py | 10 +++--- 4 files changed, 61 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 5d991805a1..9db6aa9f94 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,4 @@ + Anserini ======== [![build](https://github.com/castorini/anserini/actions/workflows/maven.yml/badge.svg)](https://github.com/castorini/anserini/actions) @@ -118,8 +119,8 @@ See individual pages for details! + F = "flat" baseline + MF = "multifield" baseline -+ SPLADE = SPLADE-distill CoCodenser-medium + UCx = uniCOIL (noexp) ++ SPLADE = SPLADE-distill CoCodenser-medium | Corpus | flat | flat-wp | multifield | UCx | SPLADE | |--------|:----:|:-------:|:----------:|:------:|:------:| diff --git a/docs/regressions-log.md b/docs/regressions-log.md index b1bc6cd944..10626a7066 100644 --- a/docs/regressions-log.md +++ b/docs/regressions-log.md @@ -3,6 +3,19 @@ The following change log details commits to regression tests that alter effectiveness and the addition of new regression tests. This documentation is useful for figuring why results may have changed over time. +### June 17, 2022 + ++ commit [`f59283`](https://github.com/castorini/anserini/commit/f59283297e79045a81d6ff84eebb116ce842736c) (06/17/2022) + +Added regressions for BEIR, uniCOIL (noexp). + +### June 16, 2022 + ++ commit [`d71a11`](https://github.com/castorini/anserini/commit/d71a118faa17a29d34431e1d5c1cfae2d567e64c) (06/16/2022) ++ commit [`d2fbe6`](https://github.com/castorini/anserini/commit/d2fbe67b0f17545c0dd2edc4a445b8df9dcf80fc) (06/15/2022) + +Added regressions for HC4 corpora. + ### May 26, 2022 + commit [`fc542b`](https://github.com/castorini/anserini/commit/fc542b5fa5dd67fe53e6110d8933b2d403f8e80e) (05/26/2022) diff --git a/docs/regressions.md b/docs/regressions.md index 86748b1635..4c1f048ea8 100644 --- a/docs/regressions.md +++ b/docs/regressions.md @@ -186,6 +186,42 @@ nohup python src/main/python/run_regression.py --index --verify --search --regre ``` +
+BEIR (v1.0.0): uniCOIL (noexp) + +```bash +nohup python src/main/python/run_regression.py --index --verify --search --regression beir-v1.0.0-trec-covid-unicoil-noexp >& logs/log.beir-v1.0.0-trec-covid-unicoil-noexp & +nohup python src/main/python/run_regression.py --index --verify --search --regression beir-v1.0.0-bioasq-unicoil-noexp >& logs/log.beir-v1.0.0-bioasq-unicoil-noexp & +nohup python src/main/python/run_regression.py --index --verify --search --regression beir-v1.0.0-nfcorpus-unicoil-noexp >& logs/log.beir-v1.0.0-nfcorpus-unicoil-noexp & +nohup python src/main/python/run_regression.py --index --verify --search --regression beir-v1.0.0-nq-unicoil-noexp >& logs/log.beir-v1.0.0-nq-unicoil-noexp & +nohup python src/main/python/run_regression.py --index --verify --search --regression beir-v1.0.0-hotpotqa-unicoil-noexp >& logs/log.beir-v1.0.0-hotpotqa-unicoil-noexp & +nohup python src/main/python/run_regression.py --index --verify --search --regression beir-v1.0.0-fiqa-unicoil-noexp >& logs/log.beir-v1.0.0-fiqa-unicoil-noexp & +nohup python src/main/python/run_regression.py --index --verify --search --regression beir-v1.0.0-signal1m-unicoil-noexp >& logs/log.beir-v1.0.0-signal1m-unicoil-noexp & +nohup python src/main/python/run_regression.py --index --verify --search --regression beir-v1.0.0-trec-news-unicoil-noexp >& logs/log.beir-v1.0.0-trec-news-unicoil-noexp & +nohup python src/main/python/run_regression.py --index --verify --search --regression beir-v1.0.0-robust04-unicoil-noexp >& logs/log.beir-v1.0.0-robust04-unicoil-noexp & +nohup python src/main/python/run_regression.py --index --verify --search --regression beir-v1.0.0-arguana-unicoil-noexp >& logs/log.beir-v1.0.0-arguana-unicoil-noexp & +nohup python src/main/python/run_regression.py --index --verify --search --regression beir-v1.0.0-webis-touche2020-unicoil-noexp >& logs/log.beir-v1.0.0-webis-touche2020-unicoil-noexp & +nohup python src/main/python/run_regression.py --index --verify --search --regression beir-v1.0.0-cqadupstack-android-unicoil-noexp >& logs/log.beir-v1.0.0-cqadupstack-android-unicoil-noexp & +nohup python src/main/python/run_regression.py --index --verify --search --regression beir-v1.0.0-cqadupstack-english-unicoil-noexp >& logs/log.beir-v1.0.0-cqadupstack-english-unicoil-noexp & +nohup python src/main/python/run_regression.py --index --verify --search --regression beir-v1.0.0-cqadupstack-gaming-unicoil-noexp >& logs/log.beir-v1.0.0-cqadupstack-gaming-unicoil-noexp & +nohup python src/main/python/run_regression.py --index --verify --search --regression beir-v1.0.0-cqadupstack-gis-unicoil-noexp >& logs/log.beir-v1.0.0-cqadupstack-gis-unicoil-noexp & +nohup python src/main/python/run_regression.py --index --verify --search --regression beir-v1.0.0-cqadupstack-mathematica-unicoil-noexp >& logs/log.beir-v1.0.0-cqadupstack-mathematica-unicoil-noexp & +nohup python src/main/python/run_regression.py --index --verify --search --regression beir-v1.0.0-cqadupstack-physics-unicoil-noexp >& logs/log.beir-v1.0.0-cqadupstack-physics-unicoil-noexp & +nohup python src/main/python/run_regression.py --index --verify --search --regression beir-v1.0.0-cqadupstack-programmers-unicoil-noexp >& logs/log.beir-v1.0.0-cqadupstack-programmers-unicoil-noexp & +nohup python src/main/python/run_regression.py --index --verify --search --regression beir-v1.0.0-cqadupstack-stats-unicoil-noexp >& logs/log.beir-v1.0.0-cqadupstack-stats-unicoil-noexp & +nohup python src/main/python/run_regression.py --index --verify --search --regression beir-v1.0.0-cqadupstack-tex-unicoil-noexp >& logs/log.beir-v1.0.0-cqadupstack-tex-unicoil-noexp & +nohup python src/main/python/run_regression.py --index --verify --search --regression beir-v1.0.0-cqadupstack-unix-unicoil-noexp >& logs/log.beir-v1.0.0-cqadupstack-unix-unicoil-noexp & +nohup python src/main/python/run_regression.py --index --verify --search --regression beir-v1.0.0-cqadupstack-webmasters-unicoil-noexp >& logs/log.beir-v1.0.0-cqadupstack-webmasters-unicoil-noexp & +nohup python src/main/python/run_regression.py --index --verify --search --regression beir-v1.0.0-cqadupstack-wordpress-unicoil-noexp >& logs/log.beir-v1.0.0-cqadupstack-wordpress-unicoil-noexp & +nohup python src/main/python/run_regression.py --index --verify --search --regression beir-v1.0.0-quora-unicoil-noexp >& logs/log.beir-v1.0.0-quora-unicoil-noexp & +nohup python src/main/python/run_regression.py --index --verify --search --regression beir-v1.0.0-dbpedia-entity-unicoil-noexp >& logs/log.beir-v1.0.0-dbpedia-entity-unicoil-noexp & +nohup python src/main/python/run_regression.py --index --verify --search --regression beir-v1.0.0-scidocs-unicoil-noexp >& logs/log.beir-v1.0.0-scidocs-unicoil-noexp & +nohup python src/main/python/run_regression.py --index --verify --search --regression beir-v1.0.0-fever-unicoil-noexp >& logs/log.beir-v1.0.0-fever-unicoil-noexp & +nohup python src/main/python/run_regression.py --index --verify --search --regression beir-v1.0.0-climate-fever-unicoil-noexp >& logs/log.beir-v1.0.0-climate-fever-unicoil-noexp & +nohup python src/main/python/run_regression.py --index --verify --search --regression beir-v1.0.0-scifact-unicoil-noexp >& logs/log.beir-v1.0.0-scifact-unicoil-noexp & +``` +
+
BEIR (v1.0.0): "flat" baseline @@ -311,6 +347,10 @@ nohup python src/main/python/run_regression.py --index --verify --search --regre nohup python src/main/python/run_regression.py --index --verify --search --regression fire12-hi >& logs/log.fire12-hi & nohup python src/main/python/run_regression.py --index --verify --search --regression fire12-en >& logs/log.fire12-en & +nohup python src/main/python/run_regression.py --index --verify --search --regression hc4-v1.0-fa >& logs/log.hc4-v1.0-fa & +nohup python src/main/python/run_regression.py --index --verify --search --regression hc4-v1.0-ru >& logs/log.hc4-v1.0-ru & +nohup python src/main/python/run_regression.py --index --verify --search --regression hc4-v1.0-zh >& logs/log.hc4-v1.0-zh & + nohup python src/main/python/run_regression.py --index --verify --search --regression disk12 >& logs/log.disk12 & nohup python src/main/python/run_regression.py --index --verify --search --regression disk45 >& logs/log.disk45 & nohup python src/main/python/run_regression.py --index --verify --search --regression robust05 >& logs/log.robust05 & diff --git a/src/main/python/beir/gather_beir_results.py b/src/main/python/beir/gather_beir_results.py index 6ee1765a40..94a88cc421 100644 --- a/src/main/python/beir/gather_beir_results.py +++ b/src/main/python/beir/gather_beir_results.py @@ -50,7 +50,7 @@ 'scifact' ] -models = ['flat', 'multifield', 'splade-distil-cocodenser-medium'] +models = ['flat', 'multifield', 'unicoil-noexp', 'splade-distil-cocodenser-medium'] metrics = ['nDCG@10', 'R@100', 'R@1000'] table = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: 0.0))) @@ -86,15 +86,17 @@ final_scores[model][metric] = final_score for metric in metrics: - print(f'{metric:25}flat multi SPLADE') - print(' ' * 25 + '-' * 6 + ' ' + '-' * 6 + ' ' + '-' * 6) + print(f'{metric:25}flat multi UCx SPLADE') + print(' ' * 25 + '-' * 6 + ' ' + '-' * 6 + ' ' + '-' * 6 + ' ' + '-' * 6) for key in beir_keys: print(f'{key:25}{table[key]["flat"][metric]:.4f} ' + f'{table[key]["multifield"][metric]:.4f} ' + + f'{table[key]["unicoil-noexp"][metric]:.4f} ' + f'{table[key]["splade-distil-cocodenser-medium"][metric]:.4f}') - print(' ' * 25 + '-' * 6 + ' ' + '-' * 6 + ' ' + '-' * 6) + print(' ' * 25 + '-' * 6 + ' ' + '-' * 6 + ' ' + '-' * 6 + ' ' + '-' * 6) print(' ' * 25 + f'{final_scores["flat"][metric]:0.4f} ' + f'{final_scores["multifield"][metric]:0.4f} ' + + f'{final_scores["unicoil-noexp"][metric]:0.4f} ' + f'{final_scores["splade-distil-cocodenser-medium"][metric]:0.4f}') print('\n')