Skip to content

Commit 9a71f1d

Browse files
committed
do all-vs-all alignment within clusters
1 parent d595051 commit 9a71f1d

File tree

1 file changed

+32
-35
lines changed

1 file changed

+32
-35
lines changed

src/strucclustutils/structuremsa.cpp

+32-35
Original file line numberDiff line numberDiff line change
@@ -665,43 +665,39 @@ std::vector<AlnSimple> parseAndScoreExternalHits(
665665
#pragma omp for schedule(dynamic, 10)
666666
for (size_t i = 0; i < cluDbr->getSize(); ++i) {
667667
char *data = cluDbr->getData(i, thread_idx);
668-
unsigned int queryKey = cluDbr->getDbKey(i);
669-
670-
size_t queryId = seqDbrAA.getId(queryKey);
671-
seqQueryAa.mapSequence(queryId, queryKey, seqDbrAA.getData(queryId, thread_idx), seqDbrAA.getSeqLen(queryId));
672-
queryId = seqDbr3Di.getId(queryKey);
673-
seqQuerySs.mapSequence(queryId, queryKey, seqDbr3Di.getData(queryId, thread_idx), seqDbr3Di.getSeqLen(queryId));
674-
675-
structureSmithWaterman.ssw_init(
676-
&seqQueryAa,
677-
&seqQuerySs,
678-
tinySubMatAA,
679-
tinySubMat3Di,
680-
subMat_aa
681-
);
668+
669+
std::vector<unsigned int> memberDbKeys;
682670

683671
while (*data != '\0') {
684672
Util::parseKey(data, buffer);
685-
const unsigned int dbKey = (unsigned int) strtoul(buffer, NULL, 10);
686-
if (queryKey == dbKey) {
687-
data = Util::skipLine(data);
688-
continue;
689-
}
690-
size_t dbId = seqDbrAA.getId(dbKey);
691-
seqDbAa.mapSequence(dbId, dbKey, seqDbrAA.getData(dbId, thread_idx), seqDbrAA.getSeqLen(dbId));
692-
dbId = seqDbr3Di.getId(dbKey);
693-
seqDbSs.mapSequence(dbId, dbKey, seqDbr3Di.getData(dbId, thread_idx), seqDbr3Di.getSeqLen(dbId));
694-
AlnSimple aln;
695-
aln.queryId = queryKey;
696-
aln.targetId = dbKey;
697-
aln.score = structureSmithWaterman.ungapped_alignment(
698-
seqDbAa.numSequence,
699-
seqDbSs.numSequence,
700-
seqDbAa.L
701-
);
702-
threadAlnResults.push_back(aln);
673+
unsigned int dbKey = (unsigned int) strtoul(buffer, NULL, 10);
674+
memberDbKeys.push_back(dbKey);
703675
data = Util::skipLine(data);
704676
}
677+
678+
Debug(Debug::INFO) << "All-vs-all of " << memberDbKeys.size() << " structures (" <<
679+
memberDbKeys.size()*(memberDbKeys.size()+1)/2 - memberDbKeys.size() << " comparisons)\n";
680+
681+
for (size_t j = 0; j < memberDbKeys.size(); j++) {
682+
unsigned int queryKey = memberDbKeys[j];
683+
size_t queryId = seqDbrAA.getId(queryKey);
684+
seqQueryAa.mapSequence(queryId, queryKey, seqDbrAA.getData(queryId, thread_idx), seqDbrAA.getSeqLen(queryId));
685+
queryId = seqDbr3Di.getId(queryKey);
686+
seqQuerySs.mapSequence(queryId, queryKey, seqDbr3Di.getData(queryId, thread_idx), seqDbr3Di.getSeqLen(queryId));
687+
structureSmithWaterman.ssw_init(&seqQueryAa, &seqQuerySs, tinySubMatAA, tinySubMat3Di, subMat_aa);
688+
for (size_t k = j + 1; k < memberDbKeys.size(); k++) {
689+
unsigned int dbKey = memberDbKeys[k];
690+
size_t dbId = seqDbrAA.getId(dbKey);
691+
seqDbAa.mapSequence(dbId, dbKey, seqDbrAA.getData(dbId, thread_idx), seqDbrAA.getSeqLen(dbId));
692+
dbId = seqDbr3Di.getId(dbKey);
693+
seqDbSs.mapSequence(dbId, dbKey, seqDbr3Di.getData(dbId, thread_idx), seqDbr3Di.getSeqLen(dbId));
694+
AlnSimple aln;
695+
aln.queryId = queryKey;
696+
aln.targetId = dbKey;
697+
aln.score = structureSmithWaterman.ungapped_alignment(seqDbAa.numSequence, seqDbSs.numSequence, seqDbAa.L);
698+
threadAlnResults.push_back(aln);
699+
}
700+
}
705701
}
706702
#pragma omp critical
707703
{
@@ -1280,18 +1276,19 @@ int structuremsa(int argc, const char **argv, const Command& command, bool preCl
12801276
par.compBiasCorrection,
12811277
par.compBiasCorrectionScale
12821278
);
1279+
hits.insert(hits.end(), externalHits.begin(), externalHits.end());
12831280
// maybe a bit dangerous because memory of hits might be doubled
1284-
for (size_t i = 0; i < externalHits.size(); i++)
1285-
hits.push_back(externalHits[i]);
12861281
}
1282+
12871283
sortHitsByScore(hits);
12881284

12891285
Debug(Debug::INFO) << "Generating guide tree\n";
12901286
hits = mst(hits, sequenceCnt);
1287+
// assert(hits.size() == sequenceCnt - 1); // should be n-1 edges
12911288

12921289
Debug(Debug::INFO) << "Optimising merge order\n";
12931290
hits = reorderLinkage(hits, merges, sequenceCnt);
1294-
1291+
12951292
NewickParser::Node* root = NewickParser::buildTree(hits);
12961293
NewickParser::addNames(root, &qdbrH);
12971294
std::string nw = NewickParser::toNewick(root);

0 commit comments

Comments
 (0)