diff --git a/scripts/reverse_reversed_sequences.py b/scripts/reverse_reversed_sequences.py deleted file mode 100644 index f838a20d..00000000 --- a/scripts/reverse_reversed_sequences.py +++ /dev/null @@ -1,32 +0,0 @@ -import pandas as pd -import argparse -from Bio import SeqIO - -if __name__=="__main__": - parser = argparse.ArgumentParser( - description="Reverse-complement reverse-complemented sequence", - formatter_class=argparse.ArgumentDefaultsHelpFormatter - ) - - parser.add_argument('--metadata', type=str, required=True, help="input metadata") - parser.add_argument('--sequences', type=str, required=True, help="input sequences") - parser.add_argument('--output', type=str, required=True, help="output sequences") - args = parser.parse_args() - - metadata = pd.read_csv(args.metadata, sep='\t') - - # Read in fasta file - with open(args.sequences, 'r') as f_in: - with open(args.output, 'w') as f_out: - for seq in SeqIO.parse(f_in, 'fasta'): - # Check if metadata['reverse'] is True - try: - if metadata.loc[metadata['strain'] == seq.id, 'reverse'].values[0] == True: - # Reverse-complement sequence - seq.seq = seq.seq.reverse_complement() - print("Reverse-complementing sequence:", seq.id) - except: - print("No reverse complement for:", seq.id) - - # Write sequences to file - SeqIO.write(seq, f_out, 'fasta') diff --git a/workflow/snakemake_rules/core.smk b/workflow/snakemake_rules/core.smk index 77299730..d80801d0 100644 --- a/workflow/snakemake_rules/core.smk +++ b/workflow/snakemake_rules/core.smk @@ -102,21 +102,6 @@ rule combine_samples: """ -rule separate_reverse_complement: - input: - metadata=build_dir + "/{build_name}/metadata.tsv", - sequences=build_dir + "/{build_name}/filtered.fasta", - output: - build_dir + "/{build_name}/reversed.fasta", - shell: - """ - python3 scripts/reverse_reversed_sequences.py \ - --metadata {input.metadata} \ - --sequences {input.sequences} \ - --output {output} - """ - - rule align: message: """ @@ -124,7 +109,7 @@ rule align: - filling gaps with N """ input: - sequences=rules.separate_reverse_complement.output, + sequences=rules.combine_samples.output.sequences, reference=config["reference"], genemap=config["genemap"], output: @@ -142,7 +127,6 @@ rule align: --genemap {input.genemap} \ --max-indel {params.max_indel} \ --seed-spacing {params.seed_spacing} \ - --retry-reverse-complement \ --output-fasta - \ --output-insertions {output.insertions} \ {input.sequences} | seqkit seq -i > {output.alignment}