Skip to content

Commit

Permalink
Merge branch 'main' into phlat
Browse files Browse the repository at this point in the history
  • Loading branch information
Layth17 authored Aug 5, 2022
2 parents 4ce75af + cfe8d0d commit 493191d
Show file tree
Hide file tree
Showing 6 changed files with 128 additions and 77 deletions.
2 changes: 1 addition & 1 deletion definitions/detect_variants_wgs.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ workflow detectVariantsWgs {
String vep_ensembl_species
File? synonyms_file
Boolean? annotate_coding_only
String vep_pick # enum ["pick", "flag_pick", "pick_allele", "per_gene", "pick_allele_gene", "flag_pick_allele", "flag_pick_allele_gene"]
String? vep_pick # enum ["pick", "flag_pick", "pick_allele", "per_gene", "pick_allele_gene", "flag_pick_allele", "flag_pick_allele_gene"]
Array[String] vep_plugins = ["Frameshift", "Wildtype"]

String gnomad_field_name = "gnomADe_AF" # only change with gnomad_filter_annotation
Expand Down
186 changes: 117 additions & 69 deletions definitions/somatic_wgs.wdl
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
version 1.0

import "alignment_wgs.wdl" as aw
import "subworkflows/sequence_to_bqsr.wdl" as s2b
import "subworkflows/qc_wgs.wdl" as qe

import "detect_variants_wgs.wdl" as dvw

import "tools/bam_to_cram.wdl" as btc
import "tools/concordance.wdl" as c
import "tools/index_cram.wdl" as ic
import "tools/manta_somatic.wdl" as ms
import "tools/cnvkit_batch.wdl" as cb

import "types.wdl"

Expand Down Expand Up @@ -35,7 +38,6 @@ workflow somaticWgs {
Array[LabelledFile] per_base_intervals
Array[LabelledFile] per_target_intervals
Array[LabelledFile] summary_intervals
File qc_intervals

File omni_vcf
File omni_vcf_tbi
Expand Down Expand Up @@ -68,7 +70,7 @@ workflow somaticWgs {
String vep_ensembl_species
File? synonyms_file
Boolean? annotate_coding_only
String vep_pick # enum ["pick", "flag_pick", "pick_allele", "per_gene", "pick_allele_gene", "flag_pick_allele", "flag_pick_allele_gene"]
String? vep_pick # enum ["pick", "flag_pick", "pick_allele", "per_gene", "pick_allele_gene", "flag_pick_allele", "flag_pick_allele_gene"]
Boolean cle_vcf_filter = false
Array[String] variants_to_table_fields = ["CHROM", "POS", "ID", "REF", "ALT", "set", "AC", "AF"]
Array[String] variants_to_table_genotype_fields = ["GT", "AD"]
Expand All @@ -81,12 +83,14 @@ workflow somaticWgs {
String normal_sample_name
File? validated_variants
File? validated_variants_tbi

#approximate size of split target bins for CNVkit; if not set a suitable window size will be set by CNVkit automatically
Int? cnvkit_target_average_size
}

call aw.alignmentWgs as tumorAlignmentAndQc {

call s2b.sequenceToBqsr as tumorAlignment {
input:
sample_name=tumor_name,
sequence=tumor_sequence,
reference=reference,
reference_fai=reference_fai,
reference_dict=reference_dict,
Expand All @@ -96,24 +100,32 @@ workflow somaticWgs {
reference_bwt=reference_bwt,
reference_pac=reference_pac,
reference_0123=reference_0123,
unaligned=tumor_sequence,
trimming=trimming,
omni_vcf=omni_vcf,
omni_vcf_tbi=omni_vcf_tbi,
intervals=qc_intervals,
picard_metric_accumulation_level=picard_metric_accumulation_level,
bqsr_known_sites=bqsr_known_sites,
bqsr_known_sites_tbi=bqsr_known_sites_tbi,
minimum_mapping_quality=qc_minimum_mapping_quality,
minimum_base_quality=qc_minimum_base_quality,
final_name=tumor_name
}
call qe.qcWgs as tumorQc {
input:
bam=tumorAlignment.final_bam,
bam_bai=tumorAlignment.final_bam_bai,
reference=reference,
reference_fai=reference_fai,
reference_dict=reference_dict,
per_base_intervals=per_base_intervals,
per_target_intervals=per_target_intervals,
summary_intervals=summary_intervals
intervals=target_intervals,
summary_intervals=summary_intervals,
omni_vcf=omni_vcf,
omni_vcf_tbi=omni_vcf_tbi,
picard_metric_accumulation_level=picard_metric_accumulation_level,
minimum_mapping_quality=qc_minimum_mapping_quality,
minimum_base_quality=qc_minimum_base_quality
}

call aw.alignmentWgs as normalAlignmentAndQc {
call s2b.sequenceToBqsr as normalAlignment {
input:
sample_name=normal_name,
sequence=normal_sequence,
reference=reference,
reference_fai=reference_fai,
reference_dict=reference_dict,
Expand All @@ -123,29 +135,40 @@ workflow somaticWgs {
reference_bwt=reference_bwt,
reference_pac=reference_pac,
reference_0123=reference_0123,
unaligned=normal_sequence,
trimming=trimming,
omni_vcf=omni_vcf,
omni_vcf_tbi=omni_vcf_tbi,
intervals=qc_intervals,
picard_metric_accumulation_level=picard_metric_accumulation_level,
bqsr_known_sites=bqsr_known_sites,
bqsr_known_sites_tbi=bqsr_known_sites_tbi,
minimum_mapping_quality=qc_minimum_mapping_quality,
minimum_base_quality=qc_minimum_base_quality,
final_name=normal_name
}

call qe.qcWgs as normalQc {
input:
bam=normalAlignment.final_bam,
bam_bai=normalAlignment.final_bam_bai,
reference=reference,
reference_fai=reference_fai,
reference_dict=reference_dict,
per_base_intervals=per_base_intervals,
per_target_intervals=per_target_intervals,
summary_intervals=summary_intervals
intervals=target_intervals,
summary_intervals=summary_intervals,
omni_vcf=omni_vcf,
omni_vcf_tbi=omni_vcf_tbi,
picard_metric_accumulation_level=picard_metric_accumulation_level,
minimum_mapping_quality=qc_minimum_mapping_quality,
minimum_base_quality=qc_minimum_base_quality
}

call c.concordance {
input:
reference=reference,
reference_fai=reference_fai,
reference_dict=reference_dict,
bam_1=tumorAlignmentAndQc.bam,
bam_1_bai=tumorAlignmentAndQc.bam_bai,
bam_2=normalAlignmentAndQc.bam,
bam_2_bai=normalAlignmentAndQc.bam_bai,
bam_1=tumorAlignment.final_bam,
bam_1_bai=tumorAlignment.final_bam_bai,
bam_2=normalAlignment.final_bam,
bam_2_bai=normalAlignment.final_bam_bai,
vcf=somalier_vcf
}

Expand All @@ -154,10 +177,10 @@ workflow somaticWgs {
reference=reference,
reference_fai=reference_fai,
reference_dict=reference_dict,
tumor_bam=tumorAlignmentAndQc.bam,
tumor_bam_bai=tumorAlignmentAndQc.bam_bai,
normal_bam=normalAlignmentAndQc.bam,
normal_bam_bai=normalAlignmentAndQc.bam_bai,
tumor_bam=tumorAlignment.final_bam,
tumor_bam_bai=tumorAlignment.final_bam_bai,
normal_bam=normalAlignment.final_bam,
normal_bam_bai=normalAlignment.final_bam_bai,
roi_intervals=target_intervals,
strelka_exome_mode=false,
strelka_cpu_reserved=strelka_cpu_reserved,
Expand Down Expand Up @@ -195,20 +218,31 @@ workflow somaticWgs {

call ms.mantaSomatic as manta {
input:
normal_bam=normalAlignmentAndQc.bam,
normal_bam_bai=normalAlignmentAndQc.bam_bai,
tumor_bam=tumorAlignmentAndQc.bam,
tumor_bam_bai=tumorAlignmentAndQc.bam_bai,
normal_bam=normalAlignment.final_bam,
normal_bam_bai=normalAlignment.final_bam_bai,
tumor_bam=tumorAlignment.final_bam,
tumor_bam_bai=tumorAlignment.final_bam_bai,
reference=reference,
reference_fai=reference_fai,
reference_dict=reference_dict,
non_wgs=manta_non_wgs,
output_contigs=manta_output_contigs
}

call cb.cnvkitBatch as cnvkit {
input:
tumor_bam=tumorAlignment.final_bam,
tumor_bam_bai=tumorAlignment.final_bam_bai,
normal_bam=normalAlignment.final_bam,
normal_bam_bai=normalAlignment.final_bam_bai,
reference_fasta=reference,
target_average_size=cnvkit_target_average_size,
method="wgs"
}

call btc.bamToCram as tumorBamToCram {
input:
bam=tumorAlignmentAndQc.bam,
bam=tumorAlignment.final_bam,
reference=reference,
reference_fai=reference_fai,
reference_dict=reference_dict
Expand All @@ -220,7 +254,7 @@ workflow somaticWgs {

call btc.bamToCram as normalBamToCram {
input:
bam=normalAlignmentAndQc.bam,
bam=normalAlignment.final_bam,
reference=reference,
reference_fai=reference_fai,
reference_dict=reference_dict
Expand All @@ -233,40 +267,40 @@ workflow somaticWgs {

output {
File tumor_cram = tumorIndexCram.indexed_cram
File tumor_mark_duplicates_metrics = tumorAlignmentAndQc.mark_duplicates_metrics
File tumor_insert_size_metrics = tumorAlignmentAndQc.insert_size_metrics
File tumor_alignment_summary_metrics = tumorAlignmentAndQc.alignment_summary_metrics
Array[File] tumor_per_target_coverage_metrics = tumorAlignmentAndQc.per_target_coverage_metrics
Array[File] tumor_per_target_hs_metrics = tumorAlignmentAndQc.per_target_hs_metrics
Array[File] tumor_per_base_coverage_metrics = tumorAlignmentAndQc.per_base_coverage_metrics
Array[File] tumor_per_base_hs_metrics = tumorAlignmentAndQc.per_base_hs_metrics
Array[File] tumor_summary_hs_metrics = tumorAlignmentAndQc.summary_hs_metrics
File tumor_flagstats = tumorAlignmentAndQc.flagstats
File tumor_verify_bam_id_metrics = tumorAlignmentAndQc.verify_bam_id_metrics
File tumor_verify_bam_id_depth = tumorAlignmentAndQc.verify_bam_id_depth
File tumor_insert_size_histogram = tumorAlignmentAndQc.insert_size_histogram
File tumor_gc_bias_metrics = tumorAlignmentAndQc.gc_bias_metrics
File tumor_gc_bias_metrics_chart = tumorAlignmentAndQc.gc_bias_metrics_chart
File tumor_gc_bias_metrics_summary = tumorAlignmentAndQc.gc_bias_metrics_summary
File tumor_wgs_metrics = tumorAlignmentAndQc.wgs_metrics
File tumor_mark_duplicates_metrics = tumorAlignment.mark_duplicates_metrics_file
File tumor_insert_size_metrics = tumorQc.insert_size_metrics
File tumor_alignment_summary_metrics = tumorQc.alignment_summary_metrics
Array[File] tumor_per_target_coverage_metrics = tumorQc.per_target_coverage_metrics
Array[File] tumor_per_target_hs_metrics = tumorQc.per_target_hs_metrics
Array[File] tumor_per_base_coverage_metrics = tumorQc.per_base_coverage_metrics
Array[File] tumor_per_base_hs_metrics = tumorQc.per_base_hs_metrics
Array[File] tumor_summary_hs_metrics = tumorQc.summary_hs_metrics
File tumor_flagstats = tumorQc.flagstats
File tumor_verify_bam_id_metrics = tumorQc.verify_bam_id_metrics
File tumor_verify_bam_id_depth = tumorQc.verify_bam_id_depth
File tumor_insert_size_histogram = tumorQc.insert_size_histogram
File tumor_gc_bias_metrics = tumorQc.gc_bias_metrics
File tumor_gc_bias_metrics_chart = tumorQc.gc_bias_metrics_chart
File tumor_gc_bias_metrics_summary = tumorQc.gc_bias_metrics_summary
File tumor_wgs_metrics = tumorQc.wgs_metrics
##normal alignment and qc
File normal_cram = normalIndexCram.indexed_cram
File normal_mark_duplicates_metrics = normalAlignmentAndQc.mark_duplicates_metrics
File normal_insert_size_metrics = normalAlignmentAndQc.insert_size_metrics
File normal_alignment_summary_metrics = normalAlignmentAndQc.alignment_summary_metrics
Array[File] normal_per_target_coverage_metrics = normalAlignmentAndQc.per_target_coverage_metrics
Array[File] normal_per_target_hs_metrics = normalAlignmentAndQc.per_target_hs_metrics
Array[File] normal_per_base_coverage_metrics = normalAlignmentAndQc.per_base_coverage_metrics
Array[File] normal_per_base_hs_metrics = normalAlignmentAndQc.per_base_hs_metrics
Array[File] normal_summary_hs_metrics = normalAlignmentAndQc.summary_hs_metrics
File normal_flagstats = normalAlignmentAndQc.flagstats
File normal_verify_bam_id_metrics = normalAlignmentAndQc.verify_bam_id_metrics
File normal_verify_bam_id_depth = normalAlignmentAndQc.verify_bam_id_depth
File normal_insert_size_histogram = normalAlignmentAndQc.insert_size_histogram
File normal_gc_bias_metrics = normalAlignmentAndQc.gc_bias_metrics
File normal_gc_bias_metrics_chart = normalAlignmentAndQc.gc_bias_metrics_chart
File normal_gc_bias_metrics_summary = normalAlignmentAndQc.gc_bias_metrics_summary
File normal_wgs_metrics = normalAlignmentAndQc.wgs_metrics
File normal_mark_duplicates_metrics = normalAlignment.mark_duplicates_metrics_file
File normal_insert_size_metrics = normalQc.insert_size_metrics
File normal_alignment_summary_metrics = normalQc.alignment_summary_metrics
Array[File] normal_per_target_coverage_metrics = normalQc.per_target_coverage_metrics
Array[File] normal_per_target_hs_metrics = normalQc.per_target_hs_metrics
Array[File] normal_per_base_coverage_metrics = normalQc.per_base_coverage_metrics
Array[File] normal_per_base_hs_metrics = normalQc.per_base_hs_metrics
Array[File] normal_summary_hs_metrics = normalQc.summary_hs_metrics
File normal_flagstats = normalQc.flagstats
File normal_verify_bam_id_metrics = normalQc.verify_bam_id_metrics
File normal_verify_bam_id_depth = normalQc.verify_bam_id_depth
File normal_insert_size_histogram = normalQc.insert_size_histogram
File normal_gc_bias_metrics = normalQc.gc_bias_metrics
File normal_gc_bias_metrics_chart = normalQc.gc_bias_metrics_chart
File normal_gc_bias_metrics_summary = normalQc.gc_bias_metrics_summary
File normal_wgs_metrics = normalQc.wgs_metrics
##variant calling
File mutect_unfiltered_vcf = detectVariants.mutect_unfiltered_vcf
File mutect_unfiltered_vcf_tbi = detectVariants.mutect_unfiltered_vcf_tbi
Expand All @@ -292,6 +326,7 @@ workflow somaticWgs {
File tumor_indel_bam_readcount_tsv = detectVariants.tumor_indel_bam_readcount_tsv
File normal_snv_bam_readcount_tsv = detectVariants.normal_snv_bam_readcount_tsv
File normal_indel_bam_readcount_tsv = detectVariants.normal_indel_bam_readcount_tsv
#manta
File? diploid_variants = manta.diploid_variants
File? diploid_variants_tbi = manta.diploid_variants_tbi
File? somatic_variants = manta.somatic_variants
Expand All @@ -302,6 +337,19 @@ workflow somaticWgs {
File small_candidates_tbi = manta.small_candidates_tbi
File? tumor_only_variants = manta.tumor_only_variants
File? tumor_only_variants_tbi = manta.tumor_only_variants_tbi
#cnvkit
File? intervals_antitarget = cnvkit.intervals_antitarget
File? intervals_target = cnvkit.intervals_target
File? normal_antitarget_coverage = cnvkit.normal_antitarget_coverage
File? normal_target_coverage = cnvkit.normal_target_coverage
File? reference_coverage = cnvkit.reference_coverage
File tumor_antitarget_coverage = cnvkit.tumor_antitarget_coverage
File tumor_target_coverage = cnvkit.tumor_target_coverage
File tumor_bin_level_ratios = cnvkit.tumor_bin_level_ratios
File tumor_segmented_ratios = cnvkit.tumor_segmented_ratios
File? cn_diagram = cnvkit.cn_diagram
File? cn_scatter_plot = cnvkit.cn_scatter_plot

##sample concordance check
File somalier_concordance_metrics = concordance.somalier_pairs
File somalier_concordance_statistics = concordance.somalier_samples
Expand Down
2 changes: 1 addition & 1 deletion definitions/tools/cnvkit_batch.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ task cnvkitBatch {
cpu: 1
# We use a forked cnvkit so we can get access to root privileges
# which let us write files at /cromwell_root/
docker: "jackmaruska/cnvkit:0.9.7"
docker: "mgibio/cnvkit:0.9.9"
disks: "local-disk ~{size_needed_gb} HDD"
}

Expand Down
2 changes: 1 addition & 1 deletion definitions/tools/cnvkit_vcf_export.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ task cnvkitVcfExport {
Int space_needed_gb = 10 + round(2*size([cns_file, cnr_file], "GB"))
runtime {
memory: "8GB"
docker: "etal/cnvkit:0.9.5"
docker: "mgibio/cnvkit:0.9.9"
disks: "local-disk ~{space_needed_gb} HDD"
}

Expand Down
2 changes: 1 addition & 1 deletion definitions/tools/mark_duplicates_and_sort.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ task markDuplicatesAndSort {
Int space_needed_gb = 10 + round(5*size(bam, "GB"))
#estimate 15M reads per Gb size of bam
#markdup is listed as 2Gb per 100M reads
Int mem_needed_gb = round(((size(bam, "GB")*15)/100)*2)+20
Int mem_needed_gb = round(((size(bam, "GB")*15)/100)*2)+32
runtime {
docker: "quay.io/biocontainers/sambamba:0.8.2--h98b6b92_2"
memory: "~{mem_needed_gb}GB"
Expand Down
11 changes: 7 additions & 4 deletions definitions/tools/vep.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@ task vepTask {
Array[String] plugins
Boolean coding_only = false
Array[String] custom_args
# Require files are necessary to force a localization. The call itself uses them
# via the custom_args field, which is a string and won't localize its parts.
# Required files are necessary to force localization. The call itself uses them
# via the custom_args field, which is a string and won't localize its parts, but
# does need to be pointed to the right inputs dir after localization
Array[File] required_files # !UnusedDeclaration
Boolean everything = true
# one of [pick, flag_pick, pick-allele, per_gene, pick_allele_gene, flag_pick_allele, flag_pick_allele_gene]
Expand All @@ -42,6 +43,9 @@ task vepTask {

command <<<
mkdir ~{cache_dir} && unzip -qq ~{cache_dir_zip} -d ~{cache_dir}
#custom vep inputs (required files) get localized and we have to define this variable
#pointing to their current path so that the custom string works as expected
custom_inputs_dir=$(dirname ~{required_files[0]})

/usr/bin/perl -I /opt/lib/perl/VEP/Plugins /usr/bin/variant_effect_predictor.pl \
--format vcf \
Expand Down Expand Up @@ -84,14 +88,13 @@ task parseVepCustomAnnotationIntoArg {
python <<CODE
check_existing = "~{true="--check_existing" false="" obj.annotation.check_existing}"
custom = ",".join([
"~{obj.annotation.file}",
"$" + "custom_inputs_dir/~{basename(obj.annotation.file)}",
"~{obj.annotation.name}",
"~{obj.annotation.data_format}",
"~{obj.method}",
"~{true=1 false=0 obj.force_report_coordinates}",
"~{sep="," obj.annotation.vcf_fields}"
])
print(f"{check_existing} --custom {custom}")
CODE
>>>
Expand Down

0 comments on commit 493191d

Please # to comment.