-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathget_CDS_genbank.py
executable file
·31 lines (26 loc) · 1.01 KB
/
get_CDS_genbank.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
#!/usr/bin/env python3
# coding: utf-8
from Bio import SeqIO
import argparse
parser = argparse.ArgumentParser(description="This script extracts CDS sequences from genbank files")
parser.add_argument("genbank", type=str, metavar="genbank", nargs='*', help="Genbank file(s)")
args = parser.parse_args()
def write_CDSs_to_file():
for i in args.genbank:
with open ("{}.CDS.fa".format(i.rsplit(".", 1)[0]), "w") as CDS:
coding_list = extract_CDS(i)
for i in coding_list:
CDS.write(i)
def extract_CDS(genbank):
CDS_list = list()
for record in SeqIO.parse(genbank, "genbank"):
for FEATURE in record.features:
if FEATURE.type == "CDS":
header = FEATURE.qualifiers.get("gene")[0]
sequence = FEATURE.location.extract(record).seq
CDS_list.append(">{}\n{}\n".format(header, sequence))
return(CDS_list)
if __name__ == "__main__":
write_CDSs_to_file()
if len(args.genbank) == 0:
parser.print_help()