diff --git a/rnaseq_pipeline/targets.py b/rnaseq_pipeline/targets.py index 74027af5..b97543fc 100644 --- a/rnaseq_pipeline/targets.py +++ b/rnaseq_pipeline/targets.py @@ -10,13 +10,17 @@ class RsemReference(luigi.Target): """ Represents the target of rsem-prepare-reference script. """ - def __init__(self, prefix, taxon): - self.prefix = prefix + def __init__(self, path, taxon): + self.path = path self.taxon = taxon + @property + def prefix(self): + return join(self.path, '{}_0'.format(self.taxon)) + def exists(self): - exts = ['grp', 'ti', 'seq', 'chrlist'] - return all(exists(join(self.prefix, '{}_0.{}'.format(self.taxon, ext))) + exts = ['chrlist', 'grp', 'idx.fa', 'ng2.idx.fa', 'seq', 'ti', 'transcripts.fa'] + return all(exists(self.prefix + '.' + ext) for ext in exts) class GemmaDatasetPlatform(luigi.Target): diff --git a/rnaseq_pipeline/tasks.py b/rnaseq_pipeline/tasks.py index e47b449e..3675d281 100755 --- a/rnaseq_pipeline/tasks.py +++ b/rnaseq_pipeline/tasks.py @@ -194,9 +194,11 @@ class PrepareReference(ScheduledExternalProgramTask): def input(self): genome_dir = join(cfg.OUTPUT_DIR, cfg.GENOMES, self.reference_id) gtf_files = glob(join(genome_dir, '*.gtf')) - fasta_files = glob(join(genome_dir, '*.fn?a')) + fasta_files = glob(join(genome_dir, '*.f*a')) # FIXME: this pattern is too broad if len(gtf_files) != 1: - raise ValueError('Only one GTF file is expected in {}.'.format(genome_dir)) + raise ValueError('Exactly one GTF file is expected in {}.'.format(genome_dir)) + if len(fasta_files) < 1: + raise ValueError('At least one FASTA (with .fa or .fna extension) file is expected in {}.'.format(genome_dir)) return [luigi.LocalTarget(gtf_files[0]), [luigi.LocalTarget(f) for f in fasta_files]] @@ -212,12 +214,12 @@ def program_args(self): args.extend([t.path for t in genome_fasta]) - args.append(join(self.output().prefix)) + args.append(self.output().prefix) return args def run(self): - os.makedirs(self.output().prefix, exist_ok=True) + os.makedirs(self.output().path, exist_ok=True) return super().run() def output(self):