Skip to content

Commit

Permalink
modernize WDL
Browse files Browse the repository at this point in the history
  • Loading branch information
mlin committed Aug 4, 2020
1 parent 606ba80 commit 0ac63a1
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 51 deletions.
106 changes: 61 additions & 45 deletions spvcf.wdl
Original file line number Diff line number Diff line change
@@ -1,33 +1,40 @@
version 1.0

task spvcf_encode {
File vcf_gz
Boolean multithread = false
String release = "v1.0.0"
input {
File vcf_gz
Boolean multithread = false
String release = "v1.1.0"
Int cpu = if multithread then 8 else 4
}

parameter_meta {
vcf_gz: "stream"
}

command {
set -ex -o pipefail
command <<<
set -euxo pipefail

apt-get update -qq && apt-get install -y -qq pigz wget
wget -nv https://github.com/mlin/spVCF/releases/download/${release}/spvcf
wget -nv https://github.com/dnanexus-rnd/GLnexus/raw/master/cli/dxapplet/resources/usr/local/bin/bgzip
chmod +x spvcf bgzip
apt-get -qq update && apt-get install -y wget tabix
wget -nv https://github.com/mlin/spVCF/releases/download/~{release}/spvcf
chmod +x spvcf

threads_arg=""
if [ "${multithread}" == "true" ]; then
threads_arg="--threads $(nproc)"
if [ "~{multithread}" == "true" ]; then
threads_arg="--threads 4"
fi

nm=$(basename "${vcf_gz}" .vcf.gz)
nm="$nm.spvcf.gz"
nm=$(basename "~{vcf_gz}" .vcf.gz)
nm="${nm}.spvcf.gz"
mkdir out
pigz -dc "${vcf_gz}" | ./spvcf encode $threads_arg | ./bgzip -@ $(nproc) > "out/$nm"
}
bgzip -dc "~{vcf_gz}" | ./spvcf encode $threads_arg | bgzip -@ 4 > "out/${nm}"
>>>

runtime {
docker: "ubuntu:18.04"
docker: "ubuntu:20.04"
cpu: cpu
memory: "~{cpu} GB"
disks: "local-disk ~{ceil(size(vcf_gz,'GB'))} SSD"
}

output {
Expand All @@ -36,29 +43,33 @@ task spvcf_encode {
}

task spvcf_decode {
File spvcf_gz
String release = "v1.0.0"
input {
File spvcf_gz
String release = "v1.1.0"
}

parameter_meta {
spvcf_gz: "stream"
}

command {
set -ex -o pipefail
command <<<
set -euxo pipefail

apt-get update -qq && apt-get install -y -qq pigz wget
wget -nv https://github.com/mlin/spVCF/releases/download/${release}/spvcf
wget -nv https://github.com/dnanexus-rnd/GLnexus/raw/master/cli/dxapplet/resources/usr/local/bin/bgzip
chmod +x spvcf bgzip
apt-get -qq update && apt-get install -y wget tabix
wget -nv https://github.com/mlin/spVCF/releases/download/~{release}/spvcf
chmod +x spvcf

nm=$(basename "${spvcf_gz}" .spvcf.gz)
nm="$nm.vcf.gz"
nm=$(basename "~{spvcf_gz}" .spvcf.gz)
nm="${nm}.vcf.gz"
mkdir out
pigz -dc "${spvcf_gz}" | ./spvcf decode | ./bgzip -@ $(nproc) > "out/$nm"
}
bgzip -dc "~{spvcf_gz}" | ./spvcf decode | bgzip -@ 4 > "out/${nm}"
>>>

runtime {
docker: "ubuntu:18.04"
docker: "ubuntu:20.04"
cpu: 4
memory: "4 GB"
disks: "local-disk ~{10*ceil(size(spvcf_gz,'GB'))} SSD"
}

output {
Expand All @@ -67,35 +78,40 @@ task spvcf_decode {
}

task spvcf_squeeze {
File vcf_gz
Boolean multithread = false
String release = "v1.0.0"
input {
File vcf_gz
Boolean multithread = false
String release = "v1.1.0"
Int cpu = if multithread then 8 else 4
}

parameter_meta {
vcf_gz: "stream"
}

command {
set -ex -o pipefail
command <<<
set -euxo pipefail

apt-get update -qq && apt-get install -y -qq pigz wget
wget -nv https://github.com/mlin/spVCF/releases/download/${release}/spvcf
wget -nv https://github.com/dnanexus-rnd/GLnexus/raw/master/cli/dxapplet/resources/usr/local/bin/bgzip
chmod +x spvcf bgzip
apt-get -qq update && apt-get install -y wget tabix
wget -nv https://github.com/mlin/spVCF/releases/download/~{release}/spvcf
chmod +x spvcf

threads_arg=""
if [ "${multithread}" == "true" ]; then
threads_arg="--threads $(nproc)"
if [ "~{multithread}" == "true" ]; then
threads_arg="--threads 4"
fi

nm=$(basename "${vcf_gz}" .vcf.gz)
nm="$nm.squeeze.vcf.gz"
nm=$(basename "~{vcf_gz}" .vcf.gz)
nm="${nm}.squeeze.vcf.gz"
mkdir out
pigz -dc "${vcf_gz}" | ./spvcf squeeze $threads_arg | ./bgzip -@ $(nproc) > "out/$nm"
}
bgzip -dc "~{vcf_gz}" | ./spvcf squeeze $threads_arg | bgzip -@ 4 > "out/${nm}"
>>>

runtime {
docker: "ubuntu:18.04"
docker: "ubuntu:20.04"
cpu: cpu
memory: "~{cpu} GB"
disks: "local-disk ~{ceil(size(vcf_gz,'GB'))} SSD"
}

output {
Expand Down
26 changes: 20 additions & 6 deletions test/test_spvcf.wdl
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
version 1.0

import "spvcf.wdl" as tasks

workflow test_spvcf {
File vcf_gz # pVCF
input {
File vcf_gz # pVCF
}

# spVCF-encode the pVCF
call tasks.spvcf_encode {
Expand Down Expand Up @@ -37,11 +41,21 @@ workflow test_spvcf {
}

task verify_identical_gz_content {
File gz1
File gz2
input {
File gz1
File gz2
}

command {
set -e -o pipefail
cmp --silent <(gzip -dc "${gz1}") <(gzip -dc "${gz2}")
command <<<
set -euxo pipefail
apt-get -qq update && apt-get install -y tabix
cmp --silent <(bgzip -dc "~{gz1}") <(bgzip -dc "~{gz2}")
>>>

runtime {
docker: "ubuntu:20.04"
cpu: 4
memory: "4 GB"
disks: "local-disk ~{ceil(size(gz2,'GB')+size(gz1,'GiB'))+4} SSD"
}
}

0 comments on commit 0ac63a1

Please # to comment.