Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Intuitive file/input names #73

Merged
merged 7 commits into from
Nov 8, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions definitions/immuno.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -377,14 +377,14 @@ workflow immuno {

call eha.extractHlaAlleles as extractAlleles {
input:
file=germlineExome.optitype_tsv,
optitype_file=germlineExome.optitype_tsv,
phlat_file=germlineExome.phlat_summary
}

call hc.hlaConsensus {
input:
hla_source_mode=hla_source_mode,
optitype_hla_alleles=extractAlleles.allele_string,
hla_alleles=extractAlleles.allele_string,
clinical_mhc_classI_alleles=clinical_mhc_classI_alleles,
clinical_mhc_classII_alleles=clinical_mhc_classII_alleles
}
Expand Down Expand Up @@ -639,8 +639,8 @@ workflow immuno {
combined: pvacfuse.combined
}

File annotated_vcf = pvacseq.annotated_vcf
File annotated_tsv = pvacseq.annotated_tsv
File pvacseq_annotated_expression_vcf_gz = pvacseq.annotated_vcf
File pvacseq_annotated_variants_tsv = pvacseq.annotated_tsv

Array[File] fusioninspector_evidence = rna.fusioninspector_evidence
}
Expand Down
12 changes: 6 additions & 6 deletions definitions/tools/extract_hla_alleles.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@ version 1.0

task extractHlaAlleles {
input {
File file
File optitype_file
File phlat_file
}

Int space_needed_gb = 10 + round(size(file, "GB"))
Int space_needed_gb = 10 + round(size(optitype_file, "GB"))
runtime {
memory: "2GB"
docker: "ubuntu:xenial"
Expand All @@ -17,10 +17,10 @@ task extractHlaAlleles {
# second, extract HLA class II from the phlat file
# third, ensure there are only 2 fields of accuracy for alleles

String outname = "helper.txt"
String outname = "hla_calls_newline.txt"
String temp = "temp.txt"
command <<<
/usr/bin/awk '{FS="\t";getline;for(n=2;n<=NF-2;n++){if($n==""){}else{printf "HLA-"$n"\n"}}}' ~{file} > ~{temp}
/usr/bin/awk '{FS="\t";getline;for(n=2;n<=NF-2;n++){if($n==""){}else{printf "HLA-"$n"\n"}}}' ~{optitype_file} > ~{temp}
grep "HLA_D" ~{phlat_file} | /usr/bin/awk '{FS="\t";if($2==""){}else{printf $2"\n"};if($3==""){}else{printf $3"\n"}}' >> ~{temp}
/usr/bin/awk -F":" '{print $1 ":" $2}' ~{temp} > ~{outname}
>>>
Expand All @@ -33,12 +33,12 @@ task extractHlaAlleles {

workflow wf {
input {
File file
File optitype_file
File phlat_file
}
call extractHlaAlleles {
input:
file=file,
optitype_file=optitype_file,
phlat_file=phlat_file
}
}
25 changes: 14 additions & 11 deletions definitions/tools/hla_consensus.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ version 1.0

task hlaConsensus {
input {
Array[String] optitype_hla_alleles
Array[String] hla_alleles
Array[String]? clinical_mhc_classI_alleles
Array[String]? clinical_mhc_classII_alleles
String hla_source_mode # enum [consensus, clinical_only]
Expand All @@ -18,15 +18,16 @@ task hlaConsensus {
python -c '
#This script produces 2-4 files depending on inputs and their contents
#All are packaged together into a folder called hla_calls for convenience
#optitype_calls.txt is always produced, and is essentially a copy of optitypes output
#hla_calls.txt is always produced, and is essentially a copy of optitype and phlat output
#consensus_calls.txt is also always produced; if no clinical calls are provided, this
#file is identical to optitype_calls.txt. If clinical calls are provided, they are
#reproduced in clinical_calls.txt. If the clinical calls exactly match the optitype calls*,
#file is identical to hla_calls.txt. If clinical calls are provided, they are
#reproduced in clinical_calls.txt. If the clinical calls exactly match the hla calls*,
#all 3 files described so far will contain the same information, but are not guaranteed to
#be exactly the same (text ordering may differ, depending on the order calls are given in the input).
#If the clinical calls and optitype calls do not match, mismatched_calls.txt is then produced;
#If the clinical calls and hla calls do not match, mismatched_calls.txt is then produced;
#each line represents a gene. See below (section "write out call files") for more mismatch details.
#NOTE: optitype only produces MHC class I calls
#NOTE: optitype only produces MHC class I calls, and while PHLAT produces both MHC class I and II,
#only class II is actually passed in

#optitype input format (should be automatic):
#HLA-X*01:02
Expand Down Expand Up @@ -91,7 +92,7 @@ task hlaConsensus {
clinical_exists = ~{clinical_exists}
if (hla_source_mode == "clinical_only") and not clinical_exists:
sys.exit("HLA consensus error: No clinical calls found, but hla_source_mode is set to clinical_only")
optitype_calls = ["~{sep="\",\"" optitype_hla_alleles}"]
optitype_calls = ["~{sep="\",\"" hla_alleles}"]

if clinical_exists:
#MHC Class I clinical typing results
Expand Down Expand Up @@ -182,7 +183,9 @@ task hlaConsensus {

#Create an exact copy of optitype calls, to be bundled with other relevant
#files for convenience/later review. Always generated,
with open("hla_calls/optitype_calls.txt", "w") as o_c:
#NOTE(Layth): this hla_consensus step actually gets passed results from
#Optitype for class I and PHLAT for class II. hla_calls will include both results.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might be nice to follow up on this tool and fix some of the existing comments (and the variable name?) to reflect its true contents.

with open("hla_calls/hla_calls.txt", "w") as o_c:
o_c.write( ",".join(optitype_calls) )

#Create an exact copy of clinical calls, if they exist, to be bundled with
Expand All @@ -196,7 +199,7 @@ task hlaConsensus {
#########################################################

#A consensus file is always generated to be passed on to pvacseq. If there are
#no clinical calls, this file is the same as optitype_calls.txt. If clinical calls exist
#no clinical calls, this file is the same as hla_calls.txt. If clinical calls exist
#and $hla_solurce_mode is set to clinical_only, this file is the same as clinical_calls.txt
#Otherwise, if clinical calls exist and $hla_source_mode is set to consensus, walk
#through the tree and emit everything present as the consensus. If there is a true
Expand Down Expand Up @@ -264,14 +267,14 @@ task hlaConsensus {

workflow wf {
input {
Array[String] optitype_hla_alleles
Array[String] hla_alleles
Array[String]? clinical_mhc_classI_alleles
Array[String]? clinical_mhc_classII_alleles
}

call hlaConsensus {
input:
optitype_hla_alleles=optitype_hla_alleles,
hla_alleles=hla_alleles,
clinical_mhc_classI_alleles=clinical_mhc_classI_alleles,
clinical_mhc_classII_alleles=clinical_mhc_classII_alleles
}
Expand Down