Skip to content

Commit

Permalink
aggregate_ reps
Browse files Browse the repository at this point in the history
  • Loading branch information
Thong Nguyen committed Apr 26, 2023
1 parent 815d74c commit 9248cc9
Showing 1 changed file with 27 additions and 0 deletions.
27 changes: 27 additions & 0 deletions scripts/aggregate_rep_msmarco_doc.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#!/bin/bash
input_dir=data/msmarco_doc
aggr="mean"
pids=""
output_file=${input_dir}/results_${aggr}.txt
echo "Exp results" > $output_file
num_psgs=(1 2 3 4 5 6 7 10)
for n in ${num_psgs[@]}; do
rm -r ${input_dir}/index_${aggr}
rm -r ${input_dir}/doc_vectors_${aggr}
mkdir ${input_dir}/doc_vectors_${aggr}
for f in $input_dir/doc_vectors/*; do
python lsr/long_documents/aggregate_long_documents.py $f $aggr $n &
pids="$pids $!"
done
wait $pids

../anserini-lsr/target/appassembler/bin/IndexCollection -collection JsonSparseVectorCollection -input ${input_dir}/doc_vectors_${aggr} -index ${input_dir}/index_${aggr} -generator SparseVectorDocumentGenerator -threads 200 -impact -pretokenized

../anserini-lsr/target/appassembler/bin/SearchCollection -index ${input_dir}/index_${aggr} -topics ${input_dir}/query.tsv -topicreader TsvString -output ${input_dir}/run_${aggr}_${n}.trec -impact -pretokenized -hits 1000 -parallelism 200

echo $n >> $output_file

ir_measures data/msmarco_doc/msmarco-docdev-qrels.tsv ${input_dir}/run_${aggr}_${n}.trec MRR@10 NDCG@10 R@1000 >> $output_file

unset pids
done

0 comments on commit 9248cc9

Please # to comment.