-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcolbert_test.py
45 lines (29 loc) · 1.29 KB
/
colbert_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import metrics
import dataset
import colbert_helper
from bisect import bisect_left
from itertools import accumulate
'''
from colbert import Indexer, Searcher
from colbert.infra import Run, RunConfig, ColBERTConfig
#from colbert.data import Queries, Collection
'''
#==============================================================================================
if __name__ == "__main__":
#dataset.collection_preprocessing()
docs_dataset, queries_dataset = dataset.load_datasets("proj/docs", "proj/cfquery_detailed")
searcher = colbert_helper.get_searcher(docs_dataset)
avg_dcg = [0 for i in range(0,queries_dataset.num_rows)]
avg_idcg = [0 for i in range(0,queries_dataset.num_rows)]
for q in queries_dataset:
id = q["qid"]
query = q["query"]
relevant = q["answers"]["docs"]
answer = colbert_helper.search(searcher, docs_dataset, query, 20)
#Metrics
#====================================================================================
dcg_vector, idcg_vector = metrics.dcg(answer, relevant, q["answers"]["scores"])
avg_dcg = [a + b for a,b, in zip(avg_dcg, dcg_vector)]
avg_idcg = [a + b for a,b, in zip(avg_idcg, idcg_vector)]
ndcg = [a/b for a,b in zip(avg_dcg, avg_idcg)]
print(ndcg)