Skip to content

Commit

Permalink
Merge pull request #407 from tanglingfung/master
Browse files Browse the repository at this point in the history
Oncofuse bug fixes.

This should be considered pre-alpha, we haven't been able to validate the calls but this should get us started.

Thanks to @tanglingfung for all of his awesome work.
  • Loading branch information
roryk committed Apr 30, 2014
2 parents a2d10d6 + 3ae269e commit 8652a74
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 17 deletions.
4 changes: 3 additions & 1 deletion bcbio/pipeline/rnaseq.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ def generate_transcript_counts(data):
"""Generate counts per transcript from an alignment"""
data["count_file"] = featureCounts.count(data)
if get_in(data, ("config", "algorithm", "fusion_mode"), False):
data["oncofuse_file"] = oncofuse.run(data)
oncofuse_file = oncofuse.run(data)
if oncofuse_file:
data["oncofuse_file"] = oncofuse.run(data)
return [[data]]


Expand Down
78 changes: 62 additions & 16 deletions bcbio/rnaseq/oncofuse.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,23 @@
def run(data):
#cmd line: java -Xmx1G -jar Oncofuse.jar input_file input_type tissue_type output_file
config = data["config"]
genome_build = data.get("genome_build", "")
input_type, input_dir, input_file = _get_input_para(data)
if genome_build == 'GRCh37': #assume genome_build is hg19 otherwise
if config["algorithm"].get("aligner") in ['star']:
input_file = _fix_star_junction_output(input_file)
if config["algorithm"].get("aligner") in ['tophat', 'tophat2']:
input_file = _fix_tophat_junction_output(input_file)

#handle cases when fusion file doesn't exist
if not file_exists(input_file):
return None

out_file = os.path.join(input_dir, 'oncofuse_out.txt')

if file_exists(out_file):
return out_file

oncofuse_jar = config_utils.get_jar("Oncofuse",
config_utils.get_program("oncofuse",
config, "dir"))
Expand All @@ -31,32 +46,70 @@ def run(data):
cl = ["java"]
cl += resources.get("jvm_opts", ["-Xms750m", "-Xmx5g"])
cl += ["-jar", oncofuse_jar, input_file, input_type, tissue_type, out_file]
with file_transaction(out_file) as tx_out_file:
with open(tx_out_file, "w") as out_handle:
cmd = " ".join(cl)
with open(out_file, "w") as out_handle:
cmd = " ".join(cl)
try:
do.run(cmd, "oncofuse fusion detection", data)
except:
return out_file
return out_file

def is_non_zero_file(fpath):
return True if os.path.isfile(fpath) and os.path.getsize(fpath) > 0 else False

def _get_input_para(data):

TOPHAT_FUSION_OUTFILE = "fusions.out"
STAR_FUSION_OUTFILE = 'Chimeric.out.junction'



config = data["config"]
aligner = config["algorithm"].get("aligner")
if aligner == 'tophat2':
aligner = 'tophat'
names = data["rgnames"]
align_dir_parts = os.path.join(data["dirs"]["work"], "align", names["sample"], names["sample"]+"_%s" % aligner)
align_dir_parts = os.path.join(data["dirs"]["work"], "align", names["lane"], names["sample"]+"_%s" % aligner)
if aligner in ['tophat', 'tophat2']:
align_dir_parts = os.path.join(data["dirs"]["work"], "align", names["sample"], names["sample"]+"_%s" % aligner)
align_dir_parts = os.path.join(data["dirs"]["work"], "align", names["lane"], names["sample"]+"_%s" % aligner)
return 'tophat', align_dir_parts, os.path.join(align_dir_parts, TOPHAT_FUSION_OUTFILE)
if aligner in ['star']:
align_dir_parts = os.path.join(data["dirs"]["work"], "align", names["sample"])
return 'rnastar', align_dir_parts, os.path.join(align_dir_parts,names["sample"]+STAR_FUSION_OUTFILE)
align_dir_parts = os.path.join(data["dirs"]["work"], "align", names["lane"])
return 'rnastar', align_dir_parts, os.path.join(align_dir_parts,names["lane"]+STAR_FUSION_OUTFILE)
return None

def _fix_tophat_junction_output(chimeric_out_junction_file):
#for fusion.out
out_file = chimeric_out_junction_file + '.hg19'
with open(out_file, "w") as out_handle:
with open(chimeric_out_junction_file, "r") as in_handle:
for line in in_handle:
parts = line.split("\t")
left, right = parts[0].split("-")
parts[0] = "%s-%s" % (_h37tohg19(left), _h37tohg19(right))
out_handle.write("\t".join(parts))
return out_file

def _fix_star_junction_output(chimeric_out_junction_file):
#for Chimeric.out.junction
out_file = chimeric_out_junction_file + '.hg19'
with open(out_file, "w") as out_handle:
with open(chimeric_out_junction_file, "r") as in_handle:
for line in in_handle:
parts = line.split("\t")
parts[0] = _h37tohg19(parts[0])
parts[3] = _h37tohg19(parts[3])
out_handle.write("\t".join(parts))
return out_file

def _h37tohg19(chromosome):
MAX_CHROMOSOMES = 23
if chromosome in [str(x) for x in range(1, MAX_CHROMOSOMES)] + ["X", "Y"]:
new_chrom = "chr%s" % chromosome
elif chromosome == "MT":
new_chrom = "chrM"
else:
raise NotImplementedError(chromosome)
return new_chrom


def _oncofuse_tissue_arg_from_config(data):
Expand All @@ -70,15 +123,8 @@ def _oncofuse_tissue_arg_from_config(data):
MES (mesenchymal origin) and
AVG (average expression, if tissue source is unknown).
"""
#potential check for tumor only analysis
#if data.get("metadata", {}).get("tissue") in ["tumor", "normal"]:
# pass
SUPPORTED_TIISUE_TYPE = ["EPI", "HEM", "MES", "AVG"]
if data.get("metadata", {}).get("tissue") in SUPPORTED_TIISUE_TYPE:
return data.get("metadata", {}).get("tissue")
else:
#may handle exception later
return 'AVG'



return 'AVG'
8 changes: 8 additions & 0 deletions bcbio/upload/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ def _get_files_rnaseq(sample):
out = _maybe_add_alignment(algorithm, sample, out)
out = _maybe_add_counts(algorithm, sample, out)
out = _maybe_add_cufflinks(algorithm, sample, out)
out = _maybe_add_oncofuse(algorithm, sample, out)
return _add_meta(out, sample)

def _get_files_chipseq(sample):
Expand Down Expand Up @@ -160,6 +161,13 @@ def _maybe_add_counts(algorithm, sample, out):
"ext": "ready"})
return out

def _maybe_add_oncofuse(algorithm, sample, out):
if sample["oncofuse_file"] is not None:
out.append({"path": sample["oncofuse_file"],
"type": "oncofuse_outfile",
"ext": "ready"})
return out

def _maybe_add_cufflinks(algorithm, sample, out):
if "cufflinks_dir" in sample:
out.append({"path": sample["cufflinks_dir"],
Expand Down

0 comments on commit 8652a74

Please # to comment.