From 9248cc93321086a5a4e6e449a719993a684959f9 Mon Sep 17 00:00:00 2001 From: Thong Nguyen Date: Wed, 26 Apr 2023 19:49:23 +0200 Subject: [PATCH] aggregate_ reps --- scripts/aggregate_rep_msmarco_doc.sh | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 scripts/aggregate_rep_msmarco_doc.sh diff --git a/scripts/aggregate_rep_msmarco_doc.sh b/scripts/aggregate_rep_msmarco_doc.sh new file mode 100644 index 0000000..22cc627 --- /dev/null +++ b/scripts/aggregate_rep_msmarco_doc.sh @@ -0,0 +1,27 @@ +#!/bin/bash +input_dir=data/msmarco_doc +aggr="mean" +pids="" +output_file=${input_dir}/results_${aggr}.txt +echo "Exp results" > $output_file +num_psgs=(1 2 3 4 5 6 7 10) +for n in ${num_psgs[@]}; do +rm -r ${input_dir}/index_${aggr} +rm -r ${input_dir}/doc_vectors_${aggr} +mkdir ${input_dir}/doc_vectors_${aggr} +for f in $input_dir/doc_vectors/*; do + python lsr/long_documents/aggregate_long_documents.py $f $aggr $n & + pids="$pids $!" +done +wait $pids + +../anserini-lsr/target/appassembler/bin/IndexCollection -collection JsonSparseVectorCollection -input ${input_dir}/doc_vectors_${aggr} -index ${input_dir}/index_${aggr} -generator SparseVectorDocumentGenerator -threads 200 -impact -pretokenized + +../anserini-lsr/target/appassembler/bin/SearchCollection -index ${input_dir}/index_${aggr} -topics ${input_dir}/query.tsv -topicreader TsvString -output ${input_dir}/run_${aggr}_${n}.trec -impact -pretokenized -hits 1000 -parallelism 200 + +echo $n >> $output_file + +ir_measures data/msmarco_doc/msmarco-docdev-qrels.tsv ${input_dir}/run_${aggr}_${n}.trec MRR@10 NDCG@10 R@1000 >> $output_file + +unset pids +done \ No newline at end of file