forked from mikerobeson/make_SILVA_db
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconvert_rna_to_dna.py
85 lines (73 loc) · 3.01 KB
/
convert_rna_to_dna.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#! /usr/bin/env python
# This script will simply re-write FASTA file from RNA to DNA.
# Will convert Us to Ts. Optionally remove the description text.
# Optionally convert all '.' to '-' for software compatability purposes.
# That is, this
# >seq1 H. Sapiens
# ACCGGUUGGCCGUUCAGGGUACAGGUUGGCCGUUCAGGGUAA
# # will be output as:
# >seq1
# ACCGGTTGGCCGTTCAGGGTACAGGTTGGCCGTTCAGGGTAA
from skbio.io import read
import string
import argparse
from argparse import RawTextHelpFormatter
def make_trans_table(convg=False):
"""Convert U to T and optionally, . to - and remove whitespace."""
str1 = 'U'
str2 = 'T'
if convg:
str1 = str1 + '.'
str2 = str2 + '-'
tt = str.maketrans(str1, str2, string.whitespace)
return tt
def parse_seqs(fasta_ifh, fasta_ofh, convg=False, desc=False):
tt = make_trans_table(convg=convg)
if desc:
for seq in fasta_ifh:
seq_str = str(seq)
seq_str = seq_str.translate(tt)
new_str = '>' + seq.metadata['id'] + ' ' + \
seq.metadata['description'] + '\n' + seq_str + '\n'
fasta_ofh.write(new_str)
else:
for seq in fasta_ifh:
seq_str = str(seq)
seq_str = seq_str.translate(tt)
new_str = '>' + seq.metadata['id'] + '\n' + seq_str + '\n'
fasta_ofh.write(new_str)
def main():
parser = argparse.ArgumentParser(
description= 'This script will simply re-write FASTA files '
'without the description. \nWill also convert all Us to Ts and '
'optionally convert "." to "-".'
'That is, this: \n'
'\t>seq1 H. Sapiens\n'
'\tACCGGUUGGCCGUUCAGGGUACAGGUUGGCCGUUCAGGGUAA\n'
'will be output as:\n'
'\t>seq1\n'
'\tACCGGTTGGCCGTTCAGGGTACAGGTTGGCCGTTCAGGGTAA\n'
'Expected to be used with SILVA FASTA files.',
formatter_class=RawTextHelpFormatter)
req = parser.add_argument_group('REQUIRED')
req.add_argument('-i', '--input_fasta', required=True, action='store',
help='Input fasta file.')
req.add_argument('-o', '--output_fasta', required=True, action='store',
help='Output fasta file.')
optp = parser.add_argument_group('OPTIONAL')
optp.add_argument('-d', '--include_description', action='store_true',
help='Boolean. Keep the additional FASTA header '
'description text.[Default: False]')
optp.add_argument('-g', '--convert_to_gap', action='store_true',
help='Boolean. Convert "." to "-". [Default: False]')
p = parser.parse_args()
input_fasta = read(p.input_fasta, format='fasta')
output_fasta = open(p.output_fasta, 'w')
convert_to_gap = p.convert_to_gap
include_description = p.include_description
parse_seqs(input_fasta, output_fasta, convg=convert_to_gap,
desc=include_description)
input_fasta.close()
output_fasta.close()
if __name__ == '__main__':
main()