Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Try removing all references to obligatory conda environment #209

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 18 additions & 15 deletions src/main/kotlin/net/maizegenetics/phgv2/cli/AgcCompress.kt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ import com.github.ajalt.clikt.core.CliktCommand
import com.github.ajalt.clikt.parameters.options.default
import com.github.ajalt.clikt.parameters.options.option
import com.github.ajalt.clikt.parameters.options.required
import com.github.ajalt.clikt.parameters.types.boolean
import net.maizegenetics.phgv2.utils.condaPrefix
import net.maizegenetics.phgv2.utils.verifyURI
import org.apache.logging.log4j.LogManager
import java.io.File
Expand Down Expand Up @@ -60,6 +62,10 @@ class AgcCompress : CliktCommand(help = "Create a single AGC compressed file fro
val condaEnvPrefix by option (help = "Prefix for the conda environment to use. If provided, this should be the full path to the conda environment.")
.default("")

val condaEnvNeeded by option (help = "Flag to indicate if a conda environment is needed.")
.boolean()
.default(true)


override fun run() {
myLogger.info("Starting AGC compression: validate the URI")
Expand All @@ -71,13 +77,13 @@ class AgcCompress : CliktCommand(help = "Create a single AGC compressed file fro
}
// Verify the dbPath contains valid tiledb created datasets
// If it doesn't an exception will be thrown
val validDB = verifyURI(tiledbFolder,"hvcf_dataset",condaEnvPrefix)
val validDB = verifyURI(tiledbFolder,"hvcf_dataset",condaEnvPrefix, condaEnvNeeded)
// process the input
processAGCFiles(tiledbFolder,fastaList,referenceFile, condaEnvPrefix)
processAGCFiles(tiledbFolder,fastaList,referenceFile, condaEnvPrefix, condaEnvNeeded)

}

fun processAGCFiles(dbPath:String, fastaList:String, refFasta:String, condaEnvPrefix:String) {
fun processAGCFiles(dbPath:String, fastaList:String, refFasta:String, condaEnvPrefix:String, condaEnvNeeded:Boolean) {

val tempDir = "${dbPath}/temp"
File(tempDir).mkdirs()
Expand All @@ -90,7 +96,7 @@ class AgcCompress : CliktCommand(help = "Create a single AGC compressed file fro
// if it exists, check if the fasta files in the fastaList/fastaFiles are already in the agc file
// Print a list of the duplicates, add the non-duplicates to the compressed file.
val duplicateList =
compareNewExistingSampleNames("${dbPath}/assemblies.agc", getCurrentSampleNames(fastaFiles),tempDir,condaEnvPrefix)
compareNewExistingSampleNames("${dbPath}/assemblies.agc", getCurrentSampleNames(fastaFiles),tempDir,condaEnvPrefix, condaEnvNeeded)
if (duplicateList.isNotEmpty()) {
myLogger.info("The following fasta files are already represented in the AGC compressed file and will not be loaded: ${duplicateList}")
}
Expand All @@ -108,7 +114,7 @@ class AgcCompress : CliktCommand(help = "Create a single AGC compressed file fro
myLogger.info("VerifyFileAnnotation: time: " + (System.nanoTime() - startTime).toDouble() / 1_000_000_000.0 + " secs.")
// Call method to load AGC files with the list of fasta files and load option
myLogger.info("calling loadAGCFiles")
val success = loadAGCFiles(fileToLoad.toString(), "append", dbPath, refFasta,tempDir, condaEnvPrefix)
val success = loadAGCFiles(fileToLoad.toString(), "append", dbPath, refFasta,tempDir, condaEnvPrefix, condaEnvNeeded)
} else {
myLogger.info("No new fasta files to load -returning")
}
Expand All @@ -120,7 +126,7 @@ class AgcCompress : CliktCommand(help = "Create a single AGC compressed file fro
// print out time it took to verify the fasta files in seconds
myLogger.info("VerifyFileAnnotation: time: " + (System.nanoTime() - startTime).toDouble() / 1_000_000_000.0 + " secs.")
myLogger.info("calling loadAGCFiles")
val success = loadAGCFiles(fastaList, "create",dbPath,refFasta, tempDir, condaEnvPrefix)
val success = loadAGCFiles(fastaList, "create",dbPath,refFasta, tempDir, condaEnvPrefix, condaEnvNeeded)
}

}
Expand Down Expand Up @@ -165,25 +171,23 @@ class AgcCompress : CliktCommand(help = "Create a single AGC compressed file fro
}


fun loadAGCFiles(fastaFiles: String, loadOption: String, dbPath:String, refFasta:String, tempDir:String, condaEnvPrefix:String): Boolean {
fun loadAGCFiles(fastaFiles: String, loadOption: String, dbPath:String, refFasta:String, tempDir:String, condaEnvPrefix:String, condaEnvNeeded: Boolean): Boolean {

val agcFile = "${dbPath}/assemblies.agc"
val agcFileOut = "${dbPath}/assemblies_tmp.agc"
val builder = ProcessBuilder()
var redirectOutput = tempDir + "/agc_create_output.log"
var redirectError = tempDir + "/agc_create_error.log"
val command = if (condaEnvPrefix.isNotBlank()) mutableListOf("conda","run","-p",condaEnvPrefix) else mutableListOf("conda","run","-n","phgv2-conda")
val command = condaPrefix(condaEnvPrefix, condaEnvNeeded)
when (loadOption) {
"create" -> {
val createCommand = listOf("agc","create","-i",fastaFiles,"-o",agcFile,refFasta)
command.addAll(createCommand)
//builder.command("conda","run","-n","phgv2-conda","agc","create","-i",fastaFiles,"-o",agcFile,refFasta)
builder.command(command)
}
"append" -> {
val appendCommand = listOf("agc","append","-i",fastaFiles,agcFile,"-o",agcFileOut)
command.addAll(appendCommand)
//builder.command("conda","run","-n","phgv2-conda","agc","append","-i",fastaFiles,agcFile,"-o",agcFileOut)
builder.command(command)
redirectOutput = tempDir + "/agc_append_output.log"
redirectError = tempDir + "/agc_append_error.log"
Expand Down Expand Up @@ -239,15 +243,14 @@ class AgcCompress : CliktCommand(help = "Create a single AGC compressed file fro
return sampleNames
}

fun getSampleListFromAGC(agcFile:String,tempDir:String,condaEnvPrefix:String): List<String> {
fun getSampleListFromAGC(agcFile:String,tempDir:String,condaEnvPrefix:String,condaEnvNeeded: Boolean): List<String> {
// This function will return a list of samples from the AGC compressed file.
// This will be used to verify that the new list of fastas has nothing overlapping
// the exsiting fastas in the AGC compressed file.

var sampleList = listOf<String>()
// Query the agc compressed file to get list of sample names
val command = if (condaEnvPrefix.isNotBlank()) mutableListOf("conda","run","-p",condaEnvPrefix,"agc","listset",agcFile) else mutableListOf("conda","run","-n","phgv2-conda","agc","listset",agcFile)
//var builder = ProcessBuilder("conda","run","-n","phgv2-conda","agc","listset",agcFile)
val command = condaPrefix(condaEnvPrefix, condaEnvNeeded) + mutableListOf("agc","listset",agcFile)
var builder = ProcessBuilder(command)
var redirectOutput = tempDir + "/agc_create_output.log"
var redirectError = tempDir + "/agc_create_error.log"
Expand Down Expand Up @@ -276,13 +279,13 @@ class AgcCompress : CliktCommand(help = "Create a single AGC compressed file fro
// This function will take a list of fasta names compiled from the input fasta
// files and compare them to those name already in the agc compressed file.
// It returns a list of any duplicates
fun compareNewExistingSampleNames(agcFile:String, newFastas:List<String>, tempDir:String,condaEnvPrefix:String): List<String> {
fun compareNewExistingSampleNames(agcFile:String, newFastas:List<String>, tempDir:String,condaEnvPrefix:String, condaEnvNeeded: Boolean): List<String> {
// Need to process the newFastas list to just the name without extension or path
// that is what is stored as the sample name in the AGC compressed file.
val newSampleNames = getCurrentSampleNames(newFastas)

// Query the agc compressed file to get list of sample names
val duplicateList = getSampleListFromAGC(agcFile,tempDir,condaEnvPrefix).intersect(newSampleNames).toList()
val duplicateList = getSampleListFromAGC(agcFile,tempDir,condaEnvPrefix,condaEnvNeeded).intersect(newSampleNames).toList()

// Match the duplicateList to the newFastas list to get the full path and extension
// of the fasta files that are duplicates
Expand Down
93 changes: 15 additions & 78 deletions src/main/kotlin/net/maizegenetics/phgv2/cli/AlignAssemblies.kt
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,14 @@ import com.github.ajalt.clikt.parameters.groups.mutuallyExclusiveOptions
import com.github.ajalt.clikt.parameters.groups.required
import com.github.ajalt.clikt.parameters.groups.single
import com.github.ajalt.clikt.parameters.options.*
import com.github.ajalt.clikt.parameters.types.boolean
import com.github.ajalt.clikt.parameters.types.int
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.channels.Channel
import kotlinx.coroutines.launch
import kotlinx.coroutines.runBlocking
import kotlinx.coroutines.withContext
import net.maizegenetics.phgv2.utils.condaPrefix
import org.apache.logging.log4j.LogManager
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.api.toMap
Expand Down Expand Up @@ -162,6 +164,10 @@ class AlignAssemblies : CliktCommand(help = "Align prepared assembly fasta files
val condaEnvPrefix by option (help = "Prefix for the conda environment to use. If provided, this should be the full path to the conda environment.")
.default("")

val condaEnvNeeded by option (help = "Flag to indicate if a conda environment is needed.")
.boolean()
.default(true)

data class InputChannelData(
val refFasta: String,
val asmFasta: String,
Expand All @@ -188,7 +194,7 @@ class AlignAssemblies : CliktCommand(help = "Align prepared assembly fasta files

// If referenceCdsFasta and referenceSam are not provided, we need to create them
if (referenceCdsFasta == "" ) {
anchorwaveRefFiles = processRefFiles(referenceFile, gff, outputDir, runsAndThreads, condaEnvPrefix)
anchorwaveRefFiles = processRefFiles(referenceFile, gff, outputDir, runsAndThreads, condaEnvPrefix, condaEnvNeeded)
}

val cdsFasta = anchorwaveRefFiles.first
Expand All @@ -205,19 +211,17 @@ class AlignAssemblies : CliktCommand(help = "Align prepared assembly fasta files
}

fun processRefFiles( referenceFile:String, gff:String, outputDir:String,
runsAndThreads:Pair<Int, Int>, condaEnvPrefix:String): Pair<String,String>{
runsAndThreads:Pair<Int, Int>, condaEnvPrefix:String, condaEnvNeeded:Boolean): Pair<String,String>{

val cdsFasta = "$outputDir/ref.cds.fasta"
createCDSfromRefData(referenceFile, gff, cdsFasta, outputDir,condaEnvPrefix)
createCDSfromRefData(referenceFile, gff, cdsFasta, outputDir,condaEnvPrefix, condaEnvNeeded)

// run minimap2 for ref to refcds
val justNameRef = File(referenceFile).nameWithoutExtension
val samOutFile = "${justNameRef}.sam"
val refSamOutFile = "${outputDir}/${samOutFile}"

val command = if (condaEnvPrefix.isNotBlank()) mutableListOf("conda","run","-p",condaEnvPrefix, "minimap2", "-x", "splice", "-t", runsAndThreads.second.toString(), "-k", "12",
"-a", "-p", "0.4", "-N20", referenceFile, cdsFasta, "-o", refSamOutFile)
else mutableListOf("conda","run","-n","phgv2-conda","minimap2", "-x", "splice", "-t", runsAndThreads.second.toString(), "-k", "12",
val command = condaPrefix(condaEnvPrefix, condaEnvNeeded) + mutableListOf("minimap2", "-x", "splice", "-t", runsAndThreads.second.toString(), "-k", "12",
"-a", "-p", "0.4", "-N20", referenceFile, cdsFasta, "-o", refSamOutFile)
val builder = ProcessBuilder(command)

Expand Down Expand Up @@ -431,27 +435,12 @@ class AlignAssemblies : CliktCommand(help = "Align prepared assembly fasta files
}
}

private fun createCDSfromRefData(refFasta: String, gffFile: String, cdsFasta: String, outputDir: String, condaEnvPrefix:String): Boolean {
private fun createCDSfromRefData(refFasta: String, gffFile: String, cdsFasta: String, outputDir: String, condaEnvPrefix:String, condaEnvNeeded: Boolean): Boolean {

// val command = "anchorwave gff2seq -r ${refFasta} -i ${gffFile} -o ${cdsFasta} "
// Need to set the conda environment here to access anchorwave

val command = if (condaEnvPrefix.isNotBlank()) mutableListOf("conda","run","-p",condaEnvPrefix, "anchorwave",
"gff2seq",
"-r",
refFasta,
"-i",
gffFile,
"-o",
cdsFasta)
else mutableListOf("conda","run","-n","phgv2-conda","anchorwave",
"gff2seq",
"-r",
refFasta,
"-i",
gffFile,
"-o",
cdsFasta)
val command = condaPrefix(condaEnvPrefix, condaEnvNeeded) + mutableListOf("anchorwave", "gff2seq", "-r", refFasta, "-i", gffFile, "-o", cdsFasta)
val builder = ProcessBuilder(command)

val redirectOutput = "$outputDir/anchorwave_gff2seq_output.log"
Expand Down Expand Up @@ -532,36 +521,8 @@ class AlignAssemblies : CliktCommand(help = "Align prepared assembly fasta files

myLogger.info("alignAssembly: asmFileFull: ${assemblyEntry.asmFasta}, outputFile: $asmSamFile , threadsPerRun: ${assemblyEntry.threadsPerRun}")

val command = if (condaEnvPrefix.isNotBlank()) mutableListOf("conda","run","-p",condaEnvPrefix,"minimap2",
"-x",
"splice",
"-t",
assemblyEntry.threadsPerRun.toString(),
"-k",
"12",
"-a",
"-p",
"0.4",
"-N20",
assemblyEntry.asmFasta,
cdsFasta,
"-o",
asmSamFile)
else mutableListOf("conda","run","-n","phgv2-conda","minimap2",
"-x",
"splice",
"-t",
assemblyEntry.threadsPerRun.toString(),
"-k",
"12",
"-a",
"-p",
"0.4",
"-N20",
assemblyEntry.asmFasta,
cdsFasta,
"-o",
asmSamFile)
val command = condaPrefix(condaEnvPrefix, condaEnvNeeded) + mutableListOf("minimap2", "-x", "splice", "-t", assemblyEntry.threadsPerRun.toString(),
"-k", "12", "-a", "-p", "0.4", "-N20", assemblyEntry.asmFasta, cdsFasta, "-o", asmSamFile)
val builder = ProcessBuilder(command)
val redirectError = "${assemblyEntry.outputDir}/minimap2_${justName}_error.log"
val redirectOutput = "${assemblyEntry.outputDir}/minimap2_${justName}_output.log"
Expand Down Expand Up @@ -607,31 +568,7 @@ class AlignAssemblies : CliktCommand(help = "Align prepared assembly fasta files
// a GVCF keyfile from the maf keyfiles. It will be understood that the maf
// file name is <assemblyFastaNoExtension>.maf
val outputFile = "${outputDir}/${justNameAsm}.maf"
val command = if (condaEnvPrefix.isNotBlank()) mutableListOf("conda","run","-p",condaEnvPrefix,"anchorwave",
"proali",
"-i",
gffFile,
"-r",
refFasta,
"-as",
cdsFasta,
"-a",
asmSam,
"-ar",
refSam,
"-s",
asmFasta,
"-n",
anchorsproFile,
"-R",
refMaxAlignCov.toString(),
"-Q",
queryMaxAlignCov.toString(),
"-t",
threadsPerRun,
"-o",
outputFile)
else mutableListOf("conda","run","-n","phgv2-conda","anchorwave",
val command = condaPrefix(condaEnvPrefix, condaEnvNeeded) + mutableListOf("anchorwave",
"proali",
"-i",
gffFile,
Expand Down
Loading