Skip to content

Commit

Permalink
Fix output directory for genomic references and better precondition c…
Browse files Browse the repository at this point in the history
…hecks
  • Loading branch information
arteymix committed Jul 7, 2022
1 parent aa78680 commit eec6095
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 8 deletions.
12 changes: 8 additions & 4 deletions rnaseq_pipeline/targets.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,17 @@ class RsemReference(luigi.Target):
"""
Represents the target of rsem-prepare-reference script.
"""
def __init__(self, prefix, taxon):
self.prefix = prefix
def __init__(self, path, taxon):
self.path = path
self.taxon = taxon

@property
def prefix(self):
return join(self.path, '{}_0'.format(self.taxon))

def exists(self):
exts = ['grp', 'ti', 'seq', 'chrlist']
return all(exists(join(self.prefix, '{}_0.{}'.format(self.taxon, ext)))
exts = ['chrlist', 'grp', 'idx.fa', 'ng2.idx.fa', 'seq', 'ti', 'transcripts.fa']
return all(exists(self.prefix + '.' + ext)
for ext in exts)

class GemmaDatasetPlatform(luigi.Target):
Expand Down
10 changes: 6 additions & 4 deletions rnaseq_pipeline/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,9 +194,11 @@ class PrepareReference(ScheduledExternalProgramTask):
def input(self):
genome_dir = join(cfg.OUTPUT_DIR, cfg.GENOMES, self.reference_id)
gtf_files = glob(join(genome_dir, '*.gtf'))
fasta_files = glob(join(genome_dir, '*.fn?a'))
fasta_files = glob(join(genome_dir, '*.f*a')) # FIXME: this pattern is too broad
if len(gtf_files) != 1:
raise ValueError('Only one GTF file is expected in {}.'.format(genome_dir))
raise ValueError('Exactly one GTF file is expected in {}.'.format(genome_dir))
if len(fasta_files) < 1:
raise ValueError('At least one FASTA (with .fa or .fna extension) file is expected in {}.'.format(genome_dir))
return [luigi.LocalTarget(gtf_files[0]),
[luigi.LocalTarget(f) for f in fasta_files]]

Expand All @@ -212,12 +214,12 @@ def program_args(self):

args.extend([t.path for t in genome_fasta])

args.append(join(self.output().prefix))
args.append(self.output().prefix)

return args

def run(self):
os.makedirs(self.output().prefix, exist_ok=True)
os.makedirs(self.output().path, exist_ok=True)
return super().run()

def output(self):
Expand Down

0 comments on commit eec6095

Please # to comment.