-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathLibrary.cs
171 lines (162 loc) · 7.66 KB
/
Library.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
namespace Rosalind
{
/// <summary>
/// Library of common functions for use in Rosalind problems.
/// </summary>
internal class Library
{
/// <summary>
/// Parses FASTA format without sequence identifiers.
/// </summary>
/// <param name="inputFile">A string array in FASTA format.</param>
/// <returns>An array of parsed genetic strings.</returns>
public static string[] ParseFastaToArray(string[] inputFile)
{
string[] arr = new string[inputFile.Count(s => s.StartsWith('>'))];
int n = -1;
for (int i = 0; i < inputFile.Length; i++)
{
if (inputFile[i].StartsWith('>'))
{
n++;
}
else
{
arr[n] += inputFile[i];
}
}
return arr;
}
/// <summary>
/// Parses FASTA format with sequence identifiers.
/// </summary>
/// <param name="inputFile">A string array in FASTA format.</param>
/// <returns>A dictionary of parsed genetic strings with identifiers as keys.</returns>
public static Dictionary<string, string> ParseFastaToDictionary(string[] inputFile)
{
var dict = new Dictionary<string, string>();
string identifier = "";
for (int i = 0; i < inputFile.Length; i++)
{
if (inputFile[i].StartsWith('>'))
{
identifier = inputFile[i].Substring(1);
dict.Add(identifier, "");
}
else
{
dict[identifier] += inputFile[i];
}
}
return dict;
}
/// <summary>
/// RNA codon table. Stop codons have the value <c>Stop</c>.
/// </summary>
public static readonly Dictionary<string, string> RnaCodonTable = new Dictionary<string, string>()
{ {"UUU", "F" }, {"UUC", "F" }, {"UUA", "L" }, {"UUG", "L" }, {"UCU", "S" }, {"UCC", "S" }, {"UCA", "S" }, {"UCG", "S" },
{"UAU", "Y" }, {"UAC", "Y" }, {"UGU", "C" }, {"UGC", "C" }, {"UGG", "W" }, {"CUU", "L" }, {"CUC", "L" }, {"CUA", "L" }, {"CUG", "L" },
{"CCU", "P" }, {"CCC", "P" }, {"CCA", "P" }, {"CCG", "P" }, {"CAU", "H" }, {"CAC", "H" }, {"CAA", "Q" }, {"CAG", "Q" },
{"CGU", "R" }, {"CGC", "R" }, {"CGA", "R" }, {"CGG", "R" }, {"AUU", "I" }, {"AUC", "I" }, {"AUA", "I" }, {"AUG", "M" },
{"ACU", "T" }, {"ACC", "T" }, {"ACA", "T" }, {"ACG", "T" }, {"AAU", "N" }, {"AAC", "N" }, {"AAA", "K" }, {"AAG", "K" },
{"AGU", "S" }, {"AGC", "S" }, {"AGA", "R" }, {"AGG", "R" }, {"GUU", "V" }, {"GUC", "V" }, {"GUA", "V" }, {"GUG", "V" },
{"GCU", "A" }, {"GCC", "A" }, {"GCA", "A" }, {"GCG", "A" }, {"GAU", "D" }, {"GAC", "D" }, {"GAA", "E" }, {"GAG", "E" },
{"GGU", "G" }, {"GGC", "G" }, {"GGA", "G" }, {"GGG", "G" }, {"UAA", "Stop" }, {"UAG", "Stop" }, {"UGA", "Stop" }
};
/// <summary>
/// DNA codon table. Stop codons have the value <c>Stop</c>.
/// </summary>
public static readonly Dictionary<string, string> DnaCodonTable = new Dictionary<string, string>()
{ {"TTT", "F" }, {"TTC", "F" }, {"TTA", "L" }, {"TTG", "L" }, {"TCT", "S" }, {"TCC", "S" }, {"TCA", "S" }, {"TCG", "S" },
{"TAT", "Y" }, {"TAC", "Y" }, {"TGT", "C" }, {"TGC", "C" }, {"TGG", "W" }, {"CTT", "L" }, {"CTC", "L" }, {"CTA", "L" }, {"CTG", "L" },
{"CCT", "P" }, {"CCC", "P" }, {"CCA", "P" }, {"CCG", "P" }, {"CAT", "H" }, {"CAC", "H" }, {"CAA", "Q" }, {"CAG", "Q" },
{"CGT", "R" }, {"CGC", "R" }, {"CGA", "R" }, {"CGG", "R" }, {"ATT", "I" }, {"ATC", "I" }, {"ATA", "I" }, {"ATG", "M" },
{"ACT", "T" }, {"ACC", "T" }, {"ACA", "T" }, {"ACG", "T" }, {"AAT", "N" }, {"AAC", "N" }, {"AAA", "K" }, {"AAG", "K" },
{"AGT", "S" }, {"AGC", "S" }, {"AGA", "R" }, {"AGG", "R" }, {"GTT", "V" }, {"GTC", "V" }, {"GTA", "V" }, {"GTG", "V" },
{"GCT", "A" }, {"GCC", "A" }, {"GCA", "A" }, {"GCG", "A" }, {"GAT", "D" }, {"GAC", "D" }, {"GAA", "E" }, {"GAG", "E" },
{"GGT", "G" }, {"GGC", "G" }, {"GGA", "G" }, {"GGG", "G" }, {"TAA", "Stop" }, {"TAG", "Stop" }, {"TGA", "Stop" }
};
/// <summary>
/// Downloads protein data from the UniProt database.
/// </summary>
/// <param name="uniprotID">The protein ID.</param>
/// <returns>A FASTA-formatted genetic string.</returns>
public static string[] UniprotDownload(string uniprotID)
{
Uri url = new Uri("http://www.uniprot.org/uniprot/" + uniprotID + ".fasta");
using (WebClient client = new WebClient())
{
client.DownloadFile(url, @"..\..\..\Datasets\uniprot_dw.txt");
}
string[] contents = File.ReadAllLines(@"..\..\..\Datasets\uniprot_dw.txt");
contents[0] = ">" + uniprotID;
return contents;
}
/// <summary>
/// Parses the monoisotopic mass table for amino acids.
/// </summary>
/// <param name="path">Filepath to the mass table.</param>
/// <returns>A dictionary with parsed mass table values.</returns>
public static Dictionary<string, decimal> ParseMassTable(string path = @"..\..\..\Datasets\monoisotopicMassTable.txt")
{
string[] inputFile = File.ReadAllLines(path);
var dict = new Dictionary<string, decimal>();
for (int i = 0; i < inputFile.Length; i++)
{
string protein = inputFile[i].Substring(0, 1);
decimal mass = decimal.Parse(inputFile[i].Substring(4).Replace('.', ','));
dict.Add(protein, mass);
}
dict.Add("WATER", (decimal)18.01056);
return dict;
}
/// <summary>
/// Computes the factorial of an <c>int</c> number.
/// </summary>
/// <param name="number">The number to factorialise.</param>
/// <returns>The factorial of <c>number</c>.</returns>
public static int IntFactorial(int number)
{
int factorial = number;
for (int i = 2; i < number; i++)
{
factorial *= i;
}
return factorial;
}
/// <summary>
/// Computes the reverse complement of a DNA sequence.
/// </summary>
/// <param name="dnaSequence">A DNA sequence.</param>
/// <returns>The reverse complement of a DNA sequence.</returns>
public static string GetReverseComplement(string dnaSequence)
{
char[] s = dnaSequence.ToCharArray();
Array.Reverse(s);
for (int i = 0; i < s.Length; i++)
{
if (s[i] == 'A') { s[i] = 'T'; }
else if (s[i] == 'T') { s[i] = 'A'; }
else if (s[i] == 'C') { s[i] = 'G'; }
else if (s[i] == 'G') { s[i] = 'C'; }
}
string complement = new string(s);
return complement;
}
/// <summary>
/// Transcribes DNA to RNA.
/// </summary>
/// <param name="dnaSequence">The DNA sequence to transcribe.</param>
/// <returns>The transcribed RNA sequence.</returns>
public static string TranscribeDNAtoRNA(string dnaSequence)
{
string rnaSequence = dnaSequence.Replace('T', 'U');
return rnaSequence;
}
}
}