-
Notifications
You must be signed in to change notification settings - Fork 0
/
config.py
49 lines (42 loc) · 4.17 KB
/
config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
DATA_DIR = 'data/'
HADOOP_OUTPUT_DIR = 'hdfs://localhost:9000/output/out_lsh'
HADOOP_DATASET = 'hdfs://localhost:9000/output/out_dataset'
HADOOP_QUERY = 'hdfs://localhost:9000/output/out_query'
QUERY_FILE = 'data/query.txt'
QUERY_KEYWORD = 'query'
MINHASH_PARAMS = {
'num_permutations': 128,
'encoding': 'utf8'
}
DATASETS_REFERENCES = {
# 'BMS-POS_dup_dr.inp.gz': 'https://storage.googleapis.com/set-similarity-search/BMS-POS_dup_dr.inp.gz',
# 'KOSARAK_dup_dr.inp.gz': 'https://storage.googleapis.com/set-similarity-search/KOSARAK_dup_dr.inp.gz',
# 'FLICKR-london2y_dup_dr.inp.gz': 'https://storage.googleapis.com/set-similarity-search/FLICKR-london2y_dup_dr.inp.gz',
# 'NETFLIX_dup_dr.inp.gz': 'https://storage.googleapis.com/set-similarity-search/NETFLIX_dup_dr.inp.gz',
# 'orkut_ge10.inp.gz': 'https://storage.googleapis.com/set-similarity-search/orkut_ge10.inp.gz',
'canada_us_uk_opendata.inp.gz': 'https://storage.googleapis.com/set-similarity-search/canada_us_uk_opendata.inp.gz',
'canada_us_uk_opendata_queries_1k.inp.gz': 'https://storage.googleapis.com/set-similarity-search/canada_us_uk_opendata_queries_1k.inp.gz',
'canada_us_uk_opendata_queries_10k.inp.gz': 'https://storage.googleapis.com/set-similarity-search/canada_us_uk_opendata_queries_10k.inp.gz',
'canada_us_uk_opendata_queries_100k.inp.gz': 'https://storage.googleapis.com/set-similarity-search/canada_us_uk_opendata_queries_100k.inp.gz',
'wdc_webtables_2015_english_relational.inp.gz': 'https://storage.googleapis.com/set-similarity-search/wdc_webtables_2015_english_relational.inp.gz',
'wdc_webtables_2015_english_relational_queries_100.inp.gz': 'https://storage.googleapis.com/set-similarity-search/wdc_webtables_2015_english_relational_queries_100.inp.gz',
'wdc_webtables_2015_english_relational_queries_1k.inp.gz': 'https://storage.googleapis.com/set-similarity-search/wdc_webtables_2015_english_relational_queries_1k.inp.gz',
'wdc_webtables_2015_english_relational_queries_10k.inp.gz': 'https://storage.googleapis.com/set-similarity-search/wdc_webtables_2015_english_relational_queries_10k.inp.gz',
}
DATASETS_REFERENCES_LOCAL = {
'canada_us_uk_opendata_queries_1k.inp.gz': 'https://storage.googleapis.com/set-similarity-search/canada_us_uk_opendata_queries_1k.inp.gz',
'canada_us_uk_opendata_queries_10k.inp.gz': 'https://storage.googleapis.com/set-similarity-search/canada_us_uk_opendata_queries_10k.inp.gz',
'wdc_webtables_2015_english_relational_queries_100.inp.gz': 'https://storage.googleapis.com/set-similarity-search/wdc_webtables_2015_english_relational_queries_100.inp.gz',
'wdc_webtables_2015_english_relational_queries_1k.inp.gz': 'https://storage.googleapis.com/set-similarity-search/wdc_webtables_2015_english_relational_queries_1k.inp.gz',
'wdc_webtables_2015_english_relational_queries_10k.inp.gz': 'https://storage.googleapis.com/set-similarity-search/wdc_webtables_2015_english_relational_queries_10k.inp.gz',
}
DATASETS_REFERENCES_CLUSTER = {
'canada_us_uk_opendata.inp.gz': 'https://storage.googleapis.com/set-similarity-search/canada_us_uk_opendata.inp.gz',
'canada_us_uk_opendata_queries_1k.inp.gz': 'https://storage.googleapis.com/set-similarity-search/canada_us_uk_opendata_queries_1k.inp.gz',
'canada_us_uk_opendata_queries_10k.inp.gz': 'https://storage.googleapis.com/set-similarity-search/canada_us_uk_opendata_queries_10k.inp.gz',
'canada_us_uk_opendata_queries_100k.inp.gz': 'https://storage.googleapis.com/set-similarity-search/canada_us_uk_opendata_queries_100k.inp.gz',
'wdc_webtables_2015_english_relational.inp.gz': 'https://storage.googleapis.com/set-similarity-search/wdc_webtables_2015_english_relational.inp.gz',
'wdc_webtables_2015_english_relational_queries_100.inp.gz': 'https://storage.googleapis.com/set-similarity-search/wdc_webtables_2015_english_relational_queries_100.inp.gz',
'wdc_webtables_2015_english_relational_queries_1k.inp.gz': 'https://storage.googleapis.com/set-similarity-search/wdc_webtables_2015_english_relational_queries_1k.inp.gz',
'wdc_webtables_2015_english_relational_queries_10k.inp.gz': 'https://storage.googleapis.com/set-similarity-search/wdc_webtables_2015_english_relational_queries_10k.inp.gz',
}