Skip to content

Commit

Permalink
new command --sitelh: print per-site log-likelihoods (#26)
Browse files Browse the repository at this point in the history
  • Loading branch information
amkozlov committed Jun 26, 2020
1 parent 6234706 commit b2b3618
Show file tree
Hide file tree
Showing 15 changed files with 168 additions and 27 deletions.
2 changes: 1 addition & 1 deletion libs/pll-modules
11 changes: 9 additions & 2 deletions src/CommandLineParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ static struct option long_options[] =
{"ancestral", optional_argument, 0, 0 }, /* 53 */

{"workers", required_argument, 0, 0 }, /* 54 */
{"sitelh", no_argument, 0, 0 }, /* 55 */

{ 0, 0, 0, 0 }
};
Expand Down Expand Up @@ -103,7 +104,7 @@ void CommandLineParser::check_options(Options &opts)

if (opts.command == Command::evaluate || opts.command == Command::support ||
opts.command == Command::terrace || opts.command == Command::rfdist ||
opts.command == Command::ancestral)
opts.command == Command::sitelh || opts.command == Command::ancestral)
{
if (opts.tree_file.empty())
throw OptionException("Please provide a valid Newick file as an argument of --tree option.");
Expand Down Expand Up @@ -178,7 +179,7 @@ void CommandLineParser::compute_num_searches(Options &opts)
{
if (opts.command == Command::search || opts.command == Command::all ||
opts.command == Command::evaluate || opts.command == Command::start ||
opts.command == Command::ancestral)
opts.command == Command::ancestral || opts.command == Command::sitelh)
{
if (opts.start_trees.empty())
{
Expand Down Expand Up @@ -891,6 +892,11 @@ void CommandLineParser::parse_options(int argc, char** argv, Options &opts)
}
break;

case 55: /* per-site log-likelihoods */
opts.command = Command::sitelh;
num_commands++;
break;

default:
throw OptionException("Internal error in option parsing");
}
Expand Down Expand Up @@ -950,6 +956,7 @@ void CommandLineParser::print_help()
" --consense [ STRICT | MR | MR<n> | MRE ] build strict, majority-rule (MR) or extended MR (MRE) consensus tree (default: MR)\n"
" eg: --consense MR75 --tree bsrep.nw\n"
" --ancestral ancestral state reconstruction at all inner nodes\n"
" --sitelh print per-site log-likelihood values\n"
"\n"
"Command shortcuts (mutually exclusive):\n"
" --search1 Alias for: --search --tree rand{1}\n"
Expand Down
14 changes: 9 additions & 5 deletions src/MSA.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,16 +104,20 @@ void MSA::append(const string& sequence, const string& header)
_dirty = true;
}

void MSA::compress_patterns(const pll_state_t * charmap)
void MSA::compress_patterns(const pll_state_t * charmap, bool store_backmap)
{
update_pll_msa();

assert(_pll_msa->count && _pll_msa->length);

const unsigned int * w = pll_compress_site_patterns(_pll_msa->sequence,
charmap,
_pll_msa->count,
&(_pll_msa->length));
if (store_backmap)
_site_pattern_map.resize(_pll_msa->length);

auto backmap_ptr = store_backmap ? _site_pattern_map.data() : nullptr;
unsigned int * w = pll_compress_site_patterns_msa(_pll_msa,
charmap,
backmap_ptr);

if (!w)
libpll_check_error("Pattern compression failed: ", true);

Expand Down
4 changes: 3 additions & 1 deletion src/MSA.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ class MSA
MSA& operator=(const MSA& other) = delete;

void append(const std::string& sequence, const std::string& header = "");
void compress_patterns(const pll_state_t * charmap);
void compress_patterns(const pll_state_t * charmap, bool store_backmap = false);

bool empty() const { return _sequences.empty(); }
size_t size() const { return _sequences.size(); }
Expand All @@ -48,6 +48,7 @@ class MSA
size_t num_patterns() const { return _weights.size(); }
const WeightVector& weights() const {return _weights; }
const NameIdMap& label_id_map() const { return _label_id_map; }
const WeightVector site_pattern_map() const { return _site_pattern_map; }
const pll_msa_t * pll_msa() const;

const container& labels() const { return _labels; };
Expand Down Expand Up @@ -97,6 +98,7 @@ class MSA
container _labels;
NameIdMap _label_id_map;
WeightVector _weights;
WeightVector _site_pattern_map;
ProbVectorList _probs;
RangeList _local_seq_ranges;
size_t _states;
Expand Down
11 changes: 10 additions & 1 deletion src/Options.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ void Options::set_default_outfiles()
set_default_outfile(outfile_names.asr_tree, "ancestralTree");
set_default_outfile(outfile_names.asr_probs, "ancestralProbs");
set_default_outfile(outfile_names.asr_states, "ancestralStates");
set_default_outfile(outfile_names.site_loglh, "siteLH");
set_default_outfile(outfile_names.tmp_best_tree, "lastTree.TMP");
set_default_outfile(outfile_names.tmp_ml_trees, "mlTrees.TMP");
set_default_outfile(outfile_names.tmp_bs_trees, "bootstraps.TMP");
Expand Down Expand Up @@ -132,6 +133,8 @@ bool Options::result_files_exist() const
case Command::ancestral:
return sysutil_file_exists(asr_tree_file()) || sysutil_file_exists(asr_probs_file()) ||
sysutil_file_exists(asr_states_file());
case Command::sitelh:
return sysutil_file_exists(sitelh_file());
default:
return false;
}
Expand Down Expand Up @@ -177,7 +180,10 @@ void Options::remove_result_files() const
if (command == Command::consense)
sysutil_file_remove(cons_tree_file());

if (command == Command::consense)
if (command == Command::sitelh)
sysutil_file_remove(sitelh_file());

if (command == Command::ancestral)
{
sysutil_file_remove(asr_tree_file());
sysutil_file_remove(asr_probs_file());
Expand Down Expand Up @@ -286,6 +292,9 @@ std::ostream& operator<<(std::ostream& stream, const Options& opts)
case Command::ancestral:
stream << "Ancestral state reconstruction";
break;
case Command::sitelh:
stream << "Per-site likelihood computation";
break;
default:
break;
}
Expand Down
2 changes: 2 additions & 0 deletions src/Options.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ struct OutputFileNames
std::string bootstrap_msa;
std::string rfdist;
std::string cons_tree;
std::string site_loglh;
std::string asr_tree;
std::string asr_probs;
std::string asr_states;
Expand Down Expand Up @@ -133,6 +134,7 @@ class Options
std::string bootstrap_partition_file() const;
const std::string rfdist_file() const { return outfile_names.rfdist; }
const std::string cons_tree_file() const { return outfile_names.cons_tree + consense_type_name(); }
const std::string sitelh_file() const { return outfile_names.site_loglh; }

const std::string asr_tree_file() const { return outfile_names.asr_tree; }
const std::string asr_probs_file() const { return outfile_names.asr_probs; }
Expand Down
4 changes: 2 additions & 2 deletions src/PartitionInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,9 @@ size_t PartitionInfo::mark_partition_sites(unsigned int part_num, std::vector<un
return sites_assigned;
}

void PartitionInfo::compress_patterns()
void PartitionInfo::compress_patterns(bool store_backmap)
{
_msa.compress_patterns(model().charmap());
_msa.compress_patterns(model().charmap(), store_backmap);
}

pllmod_msa_stats_t * PartitionInfo::compute_stats(unsigned long stats_mask) const
Expand Down
2 changes: 1 addition & 1 deletion src/PartitionInfo.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ class PartitionInfo

// operations
size_t mark_partition_sites(unsigned int part_num, std::vector<unsigned int>& site_part) const;
void compress_patterns();
void compress_patterns(bool store_backmap = false);
void set_model_empirical_params();

private:
Expand Down
29 changes: 26 additions & 3 deletions src/PartitionedMSA.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ uintVector PartitionedMSA::get_site_part_assignment() const

const uintVector& PartitionedMSA::site_part_map() const
{
if (_site_part_map.empty())
if (_site_part_map.empty() && part_count() > 1)
_site_part_map = get_site_part_assignment();

return _site_part_map;
Expand Down Expand Up @@ -103,6 +103,29 @@ size_t PartitionedMSA::full_msa_site(size_t index, size_t site) const
}
}

/*
* This function returns a pair (partition_id, local_site_id) for each site in
* the original, full, uncompressed MSA file.
*/
IdPairVector PartitionedMSA::full_to_parted_sitemap() const
{
auto total_sites = this->total_sites();
assert(site_part_map().empty() || site_part_map().size() == total_sites);

IdPairVector sitemap(total_sites);
IDVector part_site_idx(part_count(), 0);
for (size_t i = 0; i < total_sites; ++i)
{
auto pid = site_part_map().empty() ? 0 : site_part_map()[i] - 1;
auto sid = part_site_idx[pid]++;
auto spmap = part_info(pid).msa().site_pattern_map();
sitemap[i].first = pid;
sitemap[i].second = spmap.empty() ? sid : spmap[sid];
}

return sitemap;
}


void PartitionedMSA::full_msa(MSA&& msa)
{
Expand Down Expand Up @@ -148,11 +171,11 @@ void PartitionedMSA::split_msa()
}
}

void PartitionedMSA::compress_patterns()
void PartitionedMSA::compress_patterns(bool store_backmap)
{
for (PartitionInfo& pinfo: _part_list)
{
pinfo.compress_patterns();
pinfo.compress_patterns(store_backmap);
}
}

Expand Down
3 changes: 2 additions & 1 deletion src/PartitionedMSA.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ class PartitionedMSA

size_t full_msa_site(size_t index, size_t site) const;
const uintVector& site_part_map() const;
IdPairVector full_to_parted_sitemap() const;

size_t taxon_count() const { return _taxon_names.size(); };
size_t part_count() const { return _part_list.size(); };
Expand Down Expand Up @@ -57,7 +58,7 @@ class PartitionedMSA
}

void split_msa();
void compress_patterns();
void compress_patterns(bool store_backmap = false);
void set_model_empirical_params();

private:
Expand Down
8 changes: 8 additions & 0 deletions src/TreeInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,14 @@ double TreeInfo::loglh(bool incremental)
return pllmod_treeinfo_compute_loglh(_pll_treeinfo, incremental ? 1 : 0);
}

double TreeInfo::persite_loglh(std::vector<double*> part_site_lh, bool incremental)
{
assert(part_site_lh.size() == _pll_treeinfo->partition_count);
return pllmod_treeinfo_compute_loglh_persite(_pll_treeinfo, incremental ? 1 : 0,
part_site_lh.data());
}


void TreeInfo::model(size_t partition_id, const Model& model)
{
if (partition_id >= _pll_treeinfo->partition_count)
Expand Down
1 change: 1 addition & 0 deletions src/TreeInfo.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ class TreeInfo
void set_topology_constraint(const Tree& cons_tree);

double loglh(bool incremental = false);
double persite_loglh(std::vector<double*> part_site_lh, bool incremental = false);
double optimize_params(int params_to_optimize, double lh_epsilon);
double optimize_params_all(double lh_epsilon)
{ return optimize_params(PLLMOD_OPT_PARAM_ALL, lh_epsilon); } ;
Expand Down
Loading

0 comments on commit b2b3618

Please # to comment.