@@ -457,22 +457,46 @@ size_t NcbiTaxonomy::loadMerged(const std::string &mergedFile) {
457
457
EXIT (EXIT_FAILURE);
458
458
}
459
459
460
+ std::unordered_map<TaxID, TaxID> mergedMap;
461
+ TaxID localMaxTaxID = maxTaxID;
460
462
std::string line;
461
- size_t count = 0 ;
462
463
while (std::getline (ss, line)) {
463
464
std::vector<std::string> result = splitByDelimiter (line, " \t |\t " , 2 );
464
465
if (result.size () != 2 ) {
465
466
Debug (Debug::ERROR) << " Invalid name entry!\n " ;
466
467
EXIT (EXIT_FAILURE);
467
468
}
468
469
469
- unsigned int oldId = (unsigned int )strtoul (result[0 ].c_str (), NULL , 10 );
470
- unsigned int mergedId = (unsigned int )strtoul (result[1 ].c_str (), NULL , 10 );
470
+ TaxID oldId = (TaxID) strtoul (result[0 ].c_str (), NULL , 10 );
471
+ TaxID mergedId = (TaxID) strtoul (result[1 ].c_str (), NULL , 10 );
472
+
473
+ // Only update if the oldId doesn't exist yet AND the mergedId does exist
471
474
if (!nodeExists (oldId) && nodeExists (mergedId)) {
472
- D[oldId] = D[mergedId];
473
- ++count;
475
+ if (oldId > localMaxTaxID) {
476
+ localMaxTaxID = oldId;
477
+ }
478
+ if (mergedId > localMaxTaxID) {
479
+ localMaxTaxID = mergedId;
480
+ }
481
+ mergedMap[oldId] = mergedId;
474
482
}
475
483
}
484
+
485
+ // realloc D if we find a higher maxTaxID
486
+ if (localMaxTaxID > maxTaxID) {
487
+ int * newD = new int [localMaxTaxID + 1 ];
488
+ std::copy (D, D + maxTaxID + 1 , newD);
489
+ std::fill (newD + maxTaxID + 1 , newD + (localMaxTaxID + 1 ), -1 );
490
+ delete[] D;
491
+ D = newD;
492
+ maxTaxID = localMaxTaxID;
493
+ }
494
+
495
+ size_t count = 0 ;
496
+ for (std::unordered_map<TaxID, TaxID>::iterator it = mergedMap.begin (); it != mergedMap.end (); ++it) {
497
+ D[it->first ] = D[it->second ];
498
+ ++count;
499
+ }
476
500
Debug (Debug::INFO) << " Done, added " << count << " merged nodes.\n " ;
477
501
return count;
478
502
}
0 commit comments