-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patha06_genes_solved.py
167 lines (135 loc) · 6.3 KB
/
a06_genes_solved.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
######################################################################
# Author: Scott Heggen & Emily Lovell TODO: Change this to your names
# Username: heggens & lovelle TODO: Change this to your usernames
#
# Assignment: A06: It's in your Genes
#
# Purpose: Determine an amino acid sequence given an input DNA sequence
#
######################################################################
# Acknowledgements:
# Original Author: Dr. Jan Pearce
#
# Idea from: http://www.cs.uni.edu/~schafer/1140/assignments/pa9/index.htm
#
# licensed under a Creative Commons
# Attribution-Noncommercial-Share Alike 3.0 United States License.
####################################################################################
def is_nucleotide(sequence):
"""
Checks that the string sequence provided is a valid string
consisting only of the 4 nucleotides A, C, G, and T
Returns True if so, and False otherwise
:param sequence: a DNA sequence
:return: A boolean value indicating if the sequence is valid
"""
valid_string = ["A", "C", "G", "T"]
for letter in sequence:
if letter not in valid_string:
return False
return True
def complement_strand(sequence):
"""
Returns the string which will be the second strand of the DNA sequence
given that Ts complement As, and Cs complement Gs. If given
a bad input, the function returns "Sequencing Error"
:param sequence: A DNA sequence
:return: the complement string for the DNA sequence
"""
complement = "" # This can be used to "build" the complement
letter_dictionary = {"A": "T", "C": "G", "T": "A", "G": "C"}
for letter in sequence:
if letter in letter_dictionary:
complement += letter_dictionary[letter]
else:
return "Sequencing Error"
return complement
def mRNA(sequence):
"""
Replaces each occurrence of the nucleotide T replaced with the nucleotide U.
:param sequence: the DNA sequence
:return: The same sequence with T's replaced by U's
"""
mrna = ""
for letter in sequence:
if letter == "T":
mrna += "U"
else:
mrna += letter
return mrna
def chunk_amino_acid(sequence):
"""
Uses output of mRNA(sequence) and divides it into substrings of length 3,
ignoring any "extra DNA" at the far end returning the relevant substrings in a list.
:param sequence: the DNA sequence
:return: A list where each element is a set of three DNA values
"""
list_of_chunks = []
for i in range(len(sequence)//3):
list_of_chunks.append(sequence[i*3:i*3+3])
return list_of_chunks
def amino_acid_chunks(threecharseq):
"""
Expects a three character string as a parameter and returns
the corresponding single character AminoAcid
:param threecharseq: a sequence of three characters
:return: A string representing the animo acid chunk for that sequence
"""
###################################################################
# This function was already completed correctly! No changes needed!
###################################################################
# We haven't learned about dictionaries yet, but here is one for the extra curious.
# You aren't expected to know what this is yet.
translator = {"GCA": "A", "GCC": "A", "GCG": "A", "GCU": "A",
"AGA": "R", "AGG": "R", "CGA": "R", "CGC": "R", "CGG": "R", "CGU": "R",
"GAC": "D", "GAU": "D",
"AAC": "N", "AAU": "N",
"UGC": "C", "UGU": "C",
"GAA": "E", "GAG": "E",
"CAA": "Q", "CAG": "Q",
"GGA": "G", "GGC": "G", "GGU": "G", "GGG": "G",
"CAC": "H", "CAU": "H",
"AUA": "I", "AUC": "I", "AUU": "I",
"UUA": "L", "UUG": "L", "CUA": "L", "CUC": "L", "CUG": "L", "CUU": "L",
"AAA": "K", "AAG": "K",
"AUG": "M",
"UUC": "F", "UUU": "F",
"CCA": "P", "CCC": "P", "CCG": "P", "CCU": "P",
"AGC": "S", "AGU": "S", "UCA": "S", "UCC": "S", "UCG": "S", "UCU": "S",
"ACA": "T", "ACC": "T", "ACG": "T", "ACU": "T",
"UGG": "W",
"UAC": "Y", "UAU": "Y",
"GUA": "V", "GUC": "V", "GUG": "V", "GUU": "V",
"UAA": "*", "UAG": "*", "UGA": "*"}
if threecharseq in translator.keys():
return translator[threecharseq] # Given any 3 letter sequence, this returns the amino acid for that sequence
else:
return "?" # Returns a question mark if the input is invalid
def sequence_gene(sequence):
"""
The sequence_gene() function takes a a sequence of nucleotides:
A, C, G, and T and returns
the corresponding amino acid sequence.
:param sequence: a string representing a sequence of nucleotides
:return: a string representing the amino acid sequence
"""
###################################################################
# This function was already completed correctly! No changes needed!
###################################################################
aaseq = "" # Amino acid sequence
if is_nucleotide(sequence): # Checks for a valid sequence
comp_strand = complement_strand(sequence) # Finds the complement sequence
mrna = mRNA(comp_strand) # Finds the mRNA of the complement
amino_acid_list = chunk_amino_acid(mrna) # Chunks the mRNA sequence
for amino_acid in amino_acid_list: # Loops through each chunk
aaseq = aaseq + amino_acid_chunks(amino_acid) # Creates the final amino acid sequence
return aaseq # Returns an empty string for any illegal input
def main():
"""
The main() function call which prints the resulting amino acid sequence given a DNA sequence
:return: None
"""
sequence = input("Please enter a valid gene sequence to convert to an amino acid: \n")
print("The input sequence {0} produces the amino acid {1}".format(sequence.upper(), sequence_gene(sequence.upper())))
if __name__ == "__main__":
main()