Skip to content

Commit

Permalink
Merge pull request #5 from will-rowe/inserts
Browse files Browse the repository at this point in the history
output primer scheme inserts as BED
  • Loading branch information
Will Rowe authored Aug 26, 2020
2 parents 33b4807 + 06c2cf3 commit 69ef9ec
Show file tree
Hide file tree
Showing 5 changed files with 73 additions and 41 deletions.
8 changes: 4 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@ set(CMAKE_DISABLE_IN_SOURCE_BUILD ON)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}/bin)

# set a default build type if none was specified
set(COBS_DEFAULT_BUILD_TYPE "Release")
set(DEFAULT_BUILD_TYPE "Release")
if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
message(STATUS "Setting build type to '${COBS_DEFAULT_BUILD_TYPE}' as none was specified")
set(CMAKE_BUILD_TYPE "${COBS_DEFAULT_BUILD_TYPE}" CACHE
message(STATUS "Setting build type to '${DEFAULT_BUILD_TYPE}' as none was specified")
set(CMAKE_BUILD_TYPE "${DEFAULT_BUILD_TYPE}" CACHE
STRING "Choose the type of build" FORCE)
# set the possible values of build type for cmake-gui
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS
Expand All @@ -34,7 +34,7 @@ project(ARTIC)
set(ARTIC_PROG_NAME artic-tools)
set(ARTIC_VERSION_MAJOR 0)
set(ARTIC_VERSION_MINOR 2)
set(ARTIC_VERSION_PATCH 2)
set(ARTIC_VERSION_PATCH 3)
configure_file (
"${PROJECT_SOURCE_DIR}/artic/version.hpp.in"
"${PROJECT_SOURCE_DIR}/artic/version.hpp"
Expand Down
18 changes: 18 additions & 0 deletions app/artic-tools.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,12 @@ int main(int argc, char** argv)
getterCmd->add_option("-o,--outDir", outFileName, "The directory to write the scheme and reference sequence to");

// add validator options and flags
std::string insertFile;
validatorCmd->add_option("scheme", primerSchemeFile, "The primer scheme to validate")->required()->check(CLI::ExistingFile);
validatorCmd->add_option("--schemeVersion", primerSchemeVersion, "The ARTIC primer scheme version (default = 3)");
validatorCmd->add_option("-o,--outputPrimerSeqs", outFileName, "If provided, will write primer sequences as multiFASTA (requires --refSeq to be provided)");
validatorCmd->add_option("-r,--refSeq", refSeq, "The reference sequence for the primer scheme (FASTA format)");
validatorCmd->add_option("--outputInserts", insertFile, "If provided, will write primer scheme inserts as BED (exluding primer sequences)");

// add vcfFilter options and flags
std::string vcfIn;
Expand Down Expand Up @@ -108,6 +110,8 @@ int main(int argc, char** argv)
std::cout << "scheme ref. span:\t" << ps.GetRefStart() << "-" << ps.GetRefEnd() << std::endl;
float proportion = (float)ps.GetNumOverlaps() / (float)(ps.GetRefEnd() - ps.GetRefStart());
std::cout << "scheme overlaps:\t" << proportion * 100 << "%" << std::endl;

// provide primer sequences and insert sizes if requested
if (outFileName.size() != 0)
{
if (refSeq.size() == 0)
Expand All @@ -132,6 +136,20 @@ int main(int argc, char** argv)
fai_destroy(fai);
std::cout << "primer sequences:\t" << outFileName << std::endl;
}
if (insertFile.size() != 0)
{
std::ofstream fh;
fh.open(insertFile);
int counter = 1;
for (auto amplicon : ps.GetExpAmplicons())
{
auto poolID = amplicon.GetPrimerPoolID();
fh << ps.GetReferenceName() << "\t" << amplicon.GetForwardPrimer()->GetEnd() << "\t" << amplicon.GetReversePrimer()->GetStart() << "\t" << counter << "\t" << ps.GetPrimerPool(poolID) << "\t+" << std::endl;
counter++;
}
fh.close();
std::cout << "primer inserts:\t" << insertFile << std::endl;
}
});

// add the vcfFilter callback
Expand Down
8 changes: 7 additions & 1 deletion artic/primerScheme.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,13 @@ int64_t artic::PrimerScheme::GetRefEnd(void) { return _refEnd; }
unsigned int artic::PrimerScheme::GetNumOverlaps(void) { return _ampliconOverlaps.count(); }

// GetExpAmplicons returns a vector to the amplicons the scheme expects to produce.
const std::vector<artic::Amplicon>& artic::PrimerScheme::GetExpAmplicons(void) const { return _expAmplicons; }
const std::vector<artic::Amplicon>& artic::PrimerScheme::GetExpAmplicons(void)
{
std::sort(_expAmplicons.begin(), _expAmplicons.end(), [](auto& lhs, auto& rhs) {
return lhs.GetForwardPrimer()->GetEnd() < rhs.GetForwardPrimer()->GetEnd();
});
return _expAmplicons;
}

// FindPrimers returns a primer pair with the nearest forward and reverse primer for a given segment start and end.
// Note: the primer pair may not be correctly paired, check using the IsProperlyPaired() method
Expand Down
2 changes: 1 addition & 1 deletion artic/primerScheme.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ namespace artic
unsigned int GetNumOverlaps(void);

// GetExpAmplicons returns a vector to the amplicons the scheme expects to produce.
const std::vector<Amplicon>& GetExpAmplicons(void) const;
const std::vector<Amplicon>& GetExpAmplicons(void);

// FindPrimers returns pointers to the nearest forward and reverse primer, given an alignment segment's start and end position.
Amplicon FindPrimers(int64_t segStart, int64_t segEnd);
Expand Down
78 changes: 43 additions & 35 deletions artic/primerSchemeDownloader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,20 @@
#include "log.hpp"
#include "primerScheme.hpp"

// ARTIC scheme downloads
// Note: it would be nice to have all these in one place and under a unified naming system...
const std::vector<std::string> ebovUrls = {"https://github.com/artic-network/primer-schemes/raw/master/ZaireEbola/V1/ZaireEbola.scheme.bed", "https://github.com/artic-network/primer-schemes/raw/master/ZaireEbola/V2/ZaireEbola.scheme.bed", "https://github.com/artic-network/primer-schemes/raw/master/ZaireEbola/V3/Ebov-10-Pan.bed"};
const std::vector<std::string> nivUrls = {"https://github.com/artic-network/primer-schemes/raw/master/Nipah/V1/NiV_6_Malaysia.bed"};
const std::vector<std::string> scovUrls = {"https://github.com/artic-network/artic-ncov2019/raw/master/primer_schemes/nCoV-2019/V1/nCoV-2019.scheme.bed", "https://github.com/artic-network/artic-ncov2019/raw/master/primer_schemes/nCoV-2019/V2/nCoV-2019.scheme.bed", "https://github.com/artic-network/artic-ncov2019/raw/master/primer_schemes/nCoV-2019/V3/nCoV-2019.scheme.bed"};
// Max version available for each ARTIC primer scheme
const uint maxEbolaVersion = 3;
const uint maxNipahVersion = 1;
const uint maxSARSCoV2Version = 3;

const std::vector<std::string> ebovRefUrls = {"https://github.com/artic-network/primer-schemes/raw/master/ZaireEbola/V1/ZaireEbola.reference.fasta", "https://github.com/artic-network/primer-schemes/raw/master/ZaireEbola/V2/ZaireEbola.reference.fasta", "https://github.com/artic-network/primer-schemes/raw/master/ZaireEbola/V3/Ebov-10-Pan.fasta"};
const std::vector<std::string> nivRefUrls = {"https://github.com/artic-network/primer-schemes/raw/master/Nipah/V1/NiV_6_Malaysia.fasta"};
const std::vector<std::string> scovRefUrls = {"https://github.com/artic-network/artic-ncov2019/raw/master/primer_schemes/nCoV-2019/V1/nCoV-2019.reference.fasta", "https://github.com/artic-network/artic-ncov2019/raw/master/primer_schemes/nCoV-2019/V2/nCoV-2019.reference.fasta", "https://github.com/artic-network/artic-ncov2019/raw/master/primer_schemes/nCoV-2019/V3/nCoV-2019.reference.fasta"};
// File helpers
const std::string schemeExt = ".primer.bed";
const std::string refExt = ".reference.fasta";
const std::string baseURL = "https://github.com/artic-network/primer-schemes/raw/master/";
const std::pair<std::string, std::string> ebovTags = {"ZaireEbola", "ZaireEbola"};
const std::pair<std::string, std::string> nivTags = {"Nipah", "NiV_6_Malaysia"};
const std::pair<std::string, std::string> scovTags = {"SARS-CoV-2", "nCoV-2019"};

// Scheme enum is used in the switch statement which constructs the download link
enum Scheme
{
InvalidScheme,
Expand All @@ -30,39 +34,43 @@ Scheme resolveScheme(std::string query)
{"ebola", Ebov},
{"nipah", Nipah},
{"scov2", Scov2},
{"ncov", Scov2}, // added for backward compatability
};
auto itr = schemeStrings.find(query);
if (itr != schemeStrings.end())
return itr->second;
return InvalidScheme;
}

std::string resolveLink(const std::string& tag1, unsigned int version, const std::string& tag2)
{
std::stringstream s;
s << baseURL << tag1 << "/V" << version << "/" << tag2;
return s.str();
}

// DownloadScheme will download a specified primer scheme and the reference sequence.
// if 0 is provided as a version, or an unknown version provided, the latest scheme version will be used.
void artic::DownloadScheme(const std::string& schemeName, unsigned int requestedVersion, const std::string& outDir)
{
std::string schemeURL;
std::string refURL;
unsigned int version = requestedVersion;
switch (resolveScheme(schemeName))
{
case Ebov:
if (version == 0 || version > ebovUrls.size())
version = ebovUrls.size();
schemeURL = ebovUrls.at(version - 1);
refURL = ebovRefUrls.at(version - 1);
if (version == 0 || version > maxEbolaVersion)
version = maxEbolaVersion;
schemeURL = resolveLink(ebovTags.first, version, ebovTags.second);
break;
case Nipah:
if (version == 0 || version > nivUrls.size())
version = nivUrls.size();
schemeURL = nivUrls.at(version - 1);
refURL = nivRefUrls.at(version - 1);
if (version == 0 || version > maxNipahVersion)
version = maxNipahVersion;
schemeURL = resolveLink(nivTags.first, version, nivTags.second);
break;
case Scov2:
if (version == 0 || version > scovUrls.size())
version = scovUrls.size();
schemeURL = scovUrls.at(version - 1);
refURL = scovRefUrls.at(version - 1);
if (version == 0 || version > maxSARSCoV2Version)
version = maxSARSCoV2Version;
schemeURL = resolveLink(scovTags.first, version, scovTags.second);
break;
default:
throw std::runtime_error("unknown scheme: " + schemeName);
Expand All @@ -77,36 +85,36 @@ void artic::DownloadScheme(const std::string& schemeName, unsigned int requested
LOG_WARN("requested latest scheme version, using v{}", version);

// create filenames
std::stringstream of1;
std::stringstream of2;
std::stringstream of;
if (outDir.size() != 0)
{
if (!boost::filesystem::is_directory(outDir) || !boost::filesystem::exists(outDir))
boost::filesystem::create_directory(outDir);
of1 << outDir << "/";
of2 << outDir << "/";
boost::filesystem::create_directories(outDir);
of << outDir << "/";
}
of1 << schemeName << ".v" << version << ".scheme.bed";
of2 << schemeName << ".v" << version << ".reference.fasta";
of << schemeName << ".v" << version;

// download using wget sys call
LOG_TRACE("downloading data")
std::stringstream baseCmd;
baseCmd << "wget -q -O ";
baseCmd << schemeName << ".v" << version;
std::string cmd1 = "wget -q -O " + of1.str() + " " + schemeURL + " 2>/dev/null";
std::string cmd2 = "wget -q -O " + of2.str() + " " + refURL + " 2>/dev/null";
baseCmd << "wget -q -O " << schemeName << ".v" << version;
std::string cmd1 = "wget -q -O " + of.str() + schemeExt + " " + schemeURL + schemeExt + " 2>/dev/null";
std::string cmd2 = "wget -q -O " + of.str() + refExt + " " + schemeURL + refExt + " 2>/dev/null";
int res;
res = system(cmd1.c_str());
if (res != EXIT_SUCCESS)
throw std::runtime_error("could not download scheme with wget");
res = system(cmd2.c_str());
if (res != EXIT_SUCCESS)
throw std::runtime_error("could not download reference with wget");
LOG_TRACE("\t{}", of1.str());
LOG_TRACE("\t{}", of2.str());
LOG_TRACE("\t{}{}", of.str(), schemeExt);
LOG_TRACE("\t{}{}", of.str(), refExt);

// validate the scheme
LOG_TRACE("checking scheme")
auto ps = artic::PrimerScheme(of1.str(), version);
auto ps = artic::PrimerScheme(of.str() + schemeExt, version);
LOG_TRACE("\t{}", ps.GetReferenceName());
LOG_TRACE("\t{} primer pools", ps.GetPrimerPools().size());
LOG_TRACE("\t{} primers (includes {} alts)", ps.GetNumPrimers(), ps.GetNumAlts());
LOG_TRACE("\t{} amplicons", ps.GetNumAmplicons());
}

0 comments on commit 69ef9ec

Please # to comment.