-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathcompute_mphf_generic.hpp
58 lines (47 loc) · 1.57 KB
/
compute_mphf_generic.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#include <iostream>
#include <fstream>
#include <iterator>
#include <random>
#include "common.hpp"
#include "mphf.hpp"
#include "base_hash.hpp"
#include "perfutils.hpp"
namespace emphf {
template <typename HypergraphSorter32,
typename HypergraphSorter64,
typename BaseHasher>
int compute_mphf_main(int argc, char** argv)
{
if (argc < 2) {
std::cerr << "Expected: " << argv[0] << " <filename> [output_filename]" << std::endl;
std::terminate();
}
const char* filename = argv[1];
std::string output_filename;
if (argc >= 3) {
output_filename = argv[2];
}
logger() << "Processing " << filename << std::endl;
file_lines lines(filename);
size_t n = lines.size();
logger() << n << " strings to process." << std::endl;
stl_string_adaptor adaptor;
typedef mphf<BaseHasher>mphf_t;
mphf_t mphf;
size_t max_nodes = (size_t(std::ceil(double(n) * 1.23)) + 2) / 3 * 3;
if (max_nodes >= uint64_t(1) << 32) {
logger() << "Using 64-bit sorter" << std::endl;
HypergraphSorter64 sorter;
mphf_t(sorter, n, lines, adaptor).swap(mphf);
} else {
logger() << "Using 32-bit sorter" << std::endl;
HypergraphSorter32 sorter;
mphf_t(sorter, n, lines, adaptor).swap(mphf);
}
if (output_filename.size()) {
std::ofstream os(output_filename, std::ios::binary);
mphf.save(os);
}
return 0;
}
}