From e280379c052dde7a854ba39e47626a20c3960e39 Mon Sep 17 00:00:00 2001 From: Will Rowe Date: Wed, 26 Aug 2020 12:33:42 +0100 Subject: [PATCH 1/5] sorting amplicons before returning vector in ps method, adding opt to output primer inserts during scheme validator --- app/artic-tools.cpp | 18 ++++++++++++++++++ artic/primerScheme.cpp | 8 +++++++- artic/primerScheme.hpp | 2 +- 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/app/artic-tools.cpp b/app/artic-tools.cpp index e75cb95..7adec89 100644 --- a/app/artic-tools.cpp +++ b/app/artic-tools.cpp @@ -64,10 +64,12 @@ int main(int argc, char** argv) getterCmd->add_option("-o,--outDir", outFileName, "The directory to write the scheme and reference sequence to"); // add validator options and flags + std::string insertFile; validatorCmd->add_option("scheme", primerSchemeFile, "The primer scheme to validate")->required()->check(CLI::ExistingFile); validatorCmd->add_option("--schemeVersion", primerSchemeVersion, "The ARTIC primer scheme version (default = 3)"); validatorCmd->add_option("-o,--outputPrimerSeqs", outFileName, "If provided, will write primer sequences as multiFASTA (requires --refSeq to be provided)"); validatorCmd->add_option("-r,--refSeq", refSeq, "The reference sequence for the primer scheme (FASTA format)"); + validatorCmd->add_option("--outputInserts", insertFile, "If provided, will write primer scheme inserts as BED (exluding primer sequences)"); // add vcfFilter options and flags std::string vcfIn; @@ -108,6 +110,8 @@ int main(int argc, char** argv) std::cout << "scheme ref. span:\t" << ps.GetRefStart() << "-" << ps.GetRefEnd() << std::endl; float proportion = (float)ps.GetNumOverlaps() / (float)(ps.GetRefEnd() - ps.GetRefStart()); std::cout << "scheme overlaps:\t" << proportion * 100 << "%" << std::endl; + + // provide primer sequences and insert sizes if requested if (outFileName.size() != 0) { if (refSeq.size() == 0) @@ -132,6 +136,20 @@ int main(int argc, char** argv) fai_destroy(fai); std::cout << "primer sequences:\t" << outFileName << std::endl; } + if (insertFile.size() != 0) + { + std::ofstream fh; + fh.open(insertFile); + int counter = 1; + for (auto amplicon : ps.GetExpAmplicons()) + { + auto poolID = amplicon.GetPrimerPoolID(); + fh << ps.GetReferenceName() << "\t" << amplicon.GetForwardPrimer()->GetEnd() << "\t" << amplicon.GetReversePrimer()->GetStart() << "\t" << counter << "\t" << ps.GetPrimerPool(poolID) << "\t+" << std::endl; + counter++; + } + fh.close(); + std::cout << "primer inserts:\t" << insertFile << std::endl; + } }); // add the vcfFilter callback diff --git a/artic/primerScheme.cpp b/artic/primerScheme.cpp index c05f148..88cacae 100644 --- a/artic/primerScheme.cpp +++ b/artic/primerScheme.cpp @@ -173,7 +173,13 @@ int64_t artic::PrimerScheme::GetRefEnd(void) { return _refEnd; } unsigned int artic::PrimerScheme::GetNumOverlaps(void) { return _ampliconOverlaps.count(); } // GetExpAmplicons returns a vector to the amplicons the scheme expects to produce. -const std::vector& artic::PrimerScheme::GetExpAmplicons(void) const { return _expAmplicons; } +const std::vector& artic::PrimerScheme::GetExpAmplicons(void) +{ + std::sort(_expAmplicons.begin(), _expAmplicons.end(), [](auto& lhs, auto& rhs) { + return lhs.GetForwardPrimer()->GetEnd() < rhs.GetForwardPrimer()->GetEnd(); + }); + return _expAmplicons; +} // FindPrimers returns a primer pair with the nearest forward and reverse primer for a given segment start and end. // Note: the primer pair may not be correctly paired, check using the IsProperlyPaired() method diff --git a/artic/primerScheme.hpp b/artic/primerScheme.hpp index 1ad8694..438432a 100644 --- a/artic/primerScheme.hpp +++ b/artic/primerScheme.hpp @@ -118,7 +118,7 @@ namespace artic unsigned int GetNumOverlaps(void); // GetExpAmplicons returns a vector to the amplicons the scheme expects to produce. - const std::vector& GetExpAmplicons(void) const; + const std::vector& GetExpAmplicons(void); // FindPrimers returns pointers to the nearest forward and reverse primer, given an alignment segment's start and end position. Amplicon FindPrimers(int64_t segStart, int64_t segEnd); From d899b2ee603159d4fca343c6c2596483df3d28e7 Mon Sep 17 00:00:00 2001 From: Will Rowe Date: Wed, 26 Aug 2020 16:01:19 +0100 Subject: [PATCH 2/5] updating endpoint for primer schemes, adding alias for ncov, cleaning up code --- artic/primerSchemeDownloader.cpp | 76 ++++++++++++++++++-------------- 1 file changed, 42 insertions(+), 34 deletions(-) diff --git a/artic/primerSchemeDownloader.cpp b/artic/primerSchemeDownloader.cpp index 7733c20..967d782 100644 --- a/artic/primerSchemeDownloader.cpp +++ b/artic/primerSchemeDownloader.cpp @@ -6,16 +6,20 @@ #include "log.hpp" #include "primerScheme.hpp" -// ARTIC scheme downloads -// Note: it would be nice to have all these in one place and under a unified naming system... -const std::vector ebovUrls = {"https://github.com/artic-network/primer-schemes/raw/master/ZaireEbola/V1/ZaireEbola.scheme.bed", "https://github.com/artic-network/primer-schemes/raw/master/ZaireEbola/V2/ZaireEbola.scheme.bed", "https://github.com/artic-network/primer-schemes/raw/master/ZaireEbola/V3/Ebov-10-Pan.bed"}; -const std::vector nivUrls = {"https://github.com/artic-network/primer-schemes/raw/master/Nipah/V1/NiV_6_Malaysia.bed"}; -const std::vector scovUrls = {"https://github.com/artic-network/artic-ncov2019/raw/master/primer_schemes/nCoV-2019/V1/nCoV-2019.scheme.bed", "https://github.com/artic-network/artic-ncov2019/raw/master/primer_schemes/nCoV-2019/V2/nCoV-2019.scheme.bed", "https://github.com/artic-network/artic-ncov2019/raw/master/primer_schemes/nCoV-2019/V3/nCoV-2019.scheme.bed"}; +// Max version available for each ARTIC primer scheme +const uint maxEbolaVersion = 3; +const uint maxNipahVersion = 1; +const uint maxSARSCoV2Version = 3; -const std::vector ebovRefUrls = {"https://github.com/artic-network/primer-schemes/raw/master/ZaireEbola/V1/ZaireEbola.reference.fasta", "https://github.com/artic-network/primer-schemes/raw/master/ZaireEbola/V2/ZaireEbola.reference.fasta", "https://github.com/artic-network/primer-schemes/raw/master/ZaireEbola/V3/Ebov-10-Pan.fasta"}; -const std::vector nivRefUrls = {"https://github.com/artic-network/primer-schemes/raw/master/Nipah/V1/NiV_6_Malaysia.fasta"}; -const std::vector scovRefUrls = {"https://github.com/artic-network/artic-ncov2019/raw/master/primer_schemes/nCoV-2019/V1/nCoV-2019.reference.fasta", "https://github.com/artic-network/artic-ncov2019/raw/master/primer_schemes/nCoV-2019/V2/nCoV-2019.reference.fasta", "https://github.com/artic-network/artic-ncov2019/raw/master/primer_schemes/nCoV-2019/V3/nCoV-2019.reference.fasta"}; +// File helpers +const std::string schemeExt = ".primer.bed"; +const std::string refExt = ".reference.fasta"; +const std::string baseURL = "https://github.com/artic-network/primer-schemes/raw/master/"; +const std::pair ebovTags = {"ZaireEbola", "ZaireEbola"}; +const std::pair nivTags = {"Nipah", "NiV_6_Malaysia"}; +const std::pair scovTags = {"SARS-CoV-2", "nCoV-2019"}; +// Scheme enum is used in the switch statement which constructs the download link enum Scheme { InvalidScheme, @@ -30,6 +34,7 @@ Scheme resolveScheme(std::string query) {"ebola", Ebov}, {"nipah", Nipah}, {"scov2", Scov2}, + {"ncov", Scov2}, // added for backward compatability }; auto itr = schemeStrings.find(query); if (itr != schemeStrings.end()) @@ -37,32 +42,35 @@ Scheme resolveScheme(std::string query) return InvalidScheme; } +std::string resolveLink(const std::string& tag1, unsigned int version, const std::string& tag2) +{ + std::stringstream s; + s << baseURL << tag1 << "/V" << version << "/" << tag2; + return s.str(); +} + // DownloadScheme will download a specified primer scheme and the reference sequence. // if 0 is provided as a version, or an unknown version provided, the latest scheme version will be used. void artic::DownloadScheme(const std::string& schemeName, unsigned int requestedVersion, const std::string& outDir) { std::string schemeURL; - std::string refURL; unsigned int version = requestedVersion; switch (resolveScheme(schemeName)) { case Ebov: - if (version == 0 || version > ebovUrls.size()) - version = ebovUrls.size(); - schemeURL = ebovUrls.at(version - 1); - refURL = ebovRefUrls.at(version - 1); + if (version == 0 || version > maxEbolaVersion) + version = maxEbolaVersion; + schemeURL = resolveLink(ebovTags.first, version, ebovTags.second); break; case Nipah: - if (version == 0 || version > nivUrls.size()) - version = nivUrls.size(); - schemeURL = nivUrls.at(version - 1); - refURL = nivRefUrls.at(version - 1); + if (version == 0 || version > maxNipahVersion) + version = maxNipahVersion; + schemeURL = resolveLink(nivTags.first, version, nivTags.second); break; case Scov2: - if (version == 0 || version > scovUrls.size()) - version = scovUrls.size(); - schemeURL = scovUrls.at(version - 1); - refURL = scovRefUrls.at(version - 1); + if (version == 0 || version > maxSARSCoV2Version) + version = maxSARSCoV2Version; + schemeURL = resolveLink(scovTags.first, version, scovTags.second); break; default: throw std::runtime_error("unknown scheme: " + schemeName); @@ -77,25 +85,21 @@ void artic::DownloadScheme(const std::string& schemeName, unsigned int requested LOG_WARN("requested latest scheme version, using v{}", version); // create filenames - std::stringstream of1; - std::stringstream of2; + std::stringstream of; if (outDir.size() != 0) { if (!boost::filesystem::is_directory(outDir) || !boost::filesystem::exists(outDir)) boost::filesystem::create_directory(outDir); - of1 << outDir << "/"; - of2 << outDir << "/"; + of << outDir << "/"; } - of1 << schemeName << ".v" << version << ".scheme.bed"; - of2 << schemeName << ".v" << version << ".reference.fasta"; + of << schemeName << ".v" << version; // download using wget sys call LOG_TRACE("downloading data") std::stringstream baseCmd; - baseCmd << "wget -q -O "; - baseCmd << schemeName << ".v" << version; - std::string cmd1 = "wget -q -O " + of1.str() + " " + schemeURL + " 2>/dev/null"; - std::string cmd2 = "wget -q -O " + of2.str() + " " + refURL + " 2>/dev/null"; + baseCmd << "wget -q -O " << schemeName << ".v" << version; + std::string cmd1 = "wget -q -O " + of.str() + schemeExt + " " + schemeURL + schemeExt + " 2>/dev/null"; + std::string cmd2 = "wget -q -O " + of.str() + refExt + " " + schemeURL + refExt + " 2>/dev/null"; int res; res = system(cmd1.c_str()); if (res != EXIT_SUCCESS) @@ -103,10 +107,14 @@ void artic::DownloadScheme(const std::string& schemeName, unsigned int requested res = system(cmd2.c_str()); if (res != EXIT_SUCCESS) throw std::runtime_error("could not download reference with wget"); - LOG_TRACE("\t{}", of1.str()); - LOG_TRACE("\t{}", of2.str()); + LOG_TRACE("\t{}{}", of.str(), schemeExt); + LOG_TRACE("\t{}{}", of.str(), refExt); // validate the scheme LOG_TRACE("checking scheme") - auto ps = artic::PrimerScheme(of1.str(), version); + auto ps = artic::PrimerScheme(of.str() + schemeExt, version); + LOG_TRACE("\t{}", ps.GetReferenceName()); + LOG_TRACE("\t{} primer pools", ps.GetPrimerPools().size()); + LOG_TRACE("\t{} primers (includes {} alts)", ps.GetNumPrimers(), ps.GetNumAlts()); + LOG_TRACE("\t{} amplicons", ps.GetNumAmplicons()); } From 0069a12e20737806b1ad977912964e1addecc5d1 Mon Sep 17 00:00:00 2001 From: Will Rowe Date: Wed, 26 Aug 2020 16:03:48 +0100 Subject: [PATCH 3/5] allowing creation of multiple nested dirs --- artic/primerSchemeDownloader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/artic/primerSchemeDownloader.cpp b/artic/primerSchemeDownloader.cpp index 967d782..ee84e91 100644 --- a/artic/primerSchemeDownloader.cpp +++ b/artic/primerSchemeDownloader.cpp @@ -89,7 +89,7 @@ void artic::DownloadScheme(const std::string& schemeName, unsigned int requested if (outDir.size() != 0) { if (!boost::filesystem::is_directory(outDir) || !boost::filesystem::exists(outDir)) - boost::filesystem::create_directory(outDir); + boost::filesystem::create_directories(outDir); of << outDir << "/"; } of << schemeName << ".v" << version; From afd022cc667896c47288898b748220d63952442b Mon Sep 17 00:00:00 2001 From: Will Rowe Date: Wed, 26 Aug 2020 16:04:28 +0100 Subject: [PATCH 4/5] patching version --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0edf6d4..28873db 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -34,7 +34,7 @@ project(ARTIC) set(ARTIC_PROG_NAME artic-tools) set(ARTIC_VERSION_MAJOR 0) set(ARTIC_VERSION_MINOR 2) -set(ARTIC_VERSION_PATCH 2) +set(ARTIC_VERSION_PATCH 3) configure_file ( "${PROJECT_SOURCE_DIR}/artic/version.hpp.in" "${PROJECT_SOURCE_DIR}/artic/version.hpp" From 06c2cf32485fa78c6e65bb312087a4bcfbd18b39 Mon Sep 17 00:00:00 2001 From: Will Rowe Date: Wed, 26 Aug 2020 16:09:26 +0100 Subject: [PATCH 5/5] patching version --- CMakeLists.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 28873db..6a0b286 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,10 +9,10 @@ set(CMAKE_DISABLE_IN_SOURCE_BUILD ON) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}/bin) # set a default build type if none was specified -set(COBS_DEFAULT_BUILD_TYPE "Release") +set(DEFAULT_BUILD_TYPE "Release") if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) - message(STATUS "Setting build type to '${COBS_DEFAULT_BUILD_TYPE}' as none was specified") - set(CMAKE_BUILD_TYPE "${COBS_DEFAULT_BUILD_TYPE}" CACHE + message(STATUS "Setting build type to '${DEFAULT_BUILD_TYPE}' as none was specified") + set(CMAKE_BUILD_TYPE "${DEFAULT_BUILD_TYPE}" CACHE STRING "Choose the type of build" FORCE) # set the possible values of build type for cmake-gui set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS