-
Notifications
You must be signed in to change notification settings - Fork 1
/
baseline.py
123 lines (99 loc) · 5.75 KB
/
baseline.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import numpy as np
import util
class Baseline(object):
def __init__(self, columns_dict=None, values_dict=None):
self.columns_dict = columns_dict
self.values_dict = values_dict
self.regret = []
self.error_rate = []
def train(self, data, labels):
for i in range(len(data)):
pred = self.evaluate_datum(data[i])
prev = 0 if len(self.regret) == 0 else self.regret[-1]
self.regret.append(prev + (0 if pred == labels[i] else 1))
self.error_rate.append(self.regret[-1] / len(self.regret))
def evaluate(self, data):
"""
Given a data (NxM) input, return the corresponding dose
returns a list (Nx1) of labels
"""
labels = np.zeros(len(data))
for i in range(len(data)):
labels[i] = self.evaluate_datum(data[i])
return labels
def evaluate_datum(self, datum):
"""
Given a data input, return the corresponding dose
"""
pass
def get_regret(self):
return self.regret
def get_error_rate(self):
return self.error_rate
class Fixed_Dose(Baseline):
def __str__(self):
return "Fixed"
def evaluate_datum(self, datum):
return util.bucket(35)
# Weights can be found in 'data/appx.pdf' section 1f
class Warfarin_Clinical_Dose(Baseline):
def __str__(self):
return "WarfarinClinicalDose"
def _get_enzyme_inducer_status(self, datum):
status = False
status |= datum[self.columns_dict['Carbamazepine (Tegretol)']] == self.values_dict['Carbamazepine (Tegretol)']['1']
status |= datum[self.columns_dict['Phenytoin (Dilantin)']] == self.values_dict['Phenytoin (Dilantin)']['1']
status |= datum[self.columns_dict['Rifampin or Rifampicin']] == self.values_dict['Rifampin or Rifampicin']['1']
return status
# FOR MISSING WEIGHT/HEIGHT: use avg.
def evaluate_datum(self, datum):
dose = 4.0376
dose -= 0.2546 * datum[self.columns_dict['Age']]
dose += 0.0118 * datum[self.columns_dict['Height (cm)']]
dose += 0.0134 * datum[self.columns_dict['Weight (kg)']]
dose -= 0.6752 * (datum[self.columns_dict['Race']] == self.values_dict['Race']['Asian'])
dose += 0.4060 * (datum[self.columns_dict['Race']] == self.values_dict['Race']['Black or African American'])
dose += 0.0443 * (datum[self.columns_dict['Race']] == self.values_dict['Race']['NA'])
dose += 0.0443 * (datum[self.columns_dict['Race']] == self.values_dict['Race']['Unknown'])
dose += 1.2799 * self._get_enzyme_inducer_status(datum)
dose -= 0.5695 * (datum[self.columns_dict['Amiodarone (Cordarone)']] == self.values_dict['Amiodarone (Cordarone)']['1'])
# dose calculated in appx.pdf states that it's the sqrt of weekly
return util.bucket(dose ** 2)
# Weights can be found in 'data/appx.pdf' section 1f
class Warfarin_Pharmacogenetic_Dose(Baseline):
def __str__(self):
return "WarfarinPharmacogeneticDose"
def _get_enzyme_inducer_status(self, datum):
status = False
status |= datum[self.columns_dict['Carbamazepine (Tegretol)']] == self.values_dict['Carbamazepine (Tegretol)']['1']
status |= datum[self.columns_dict['Phenytoin (Dilantin)']] == self.values_dict['Phenytoin (Dilantin)']['1']
status |= datum[self.columns_dict['Rifampin or Rifampicin']] == self.values_dict['Rifampin or Rifampicin']['1']
return status
# FOR MISSING WEIGHT/HEIGHT: use avg.
def evaluate_datum(self, datum):
dose = 5.6044
dose -= 0.2614 * datum[self.columns_dict['Age']]
dose += 0.0087 * datum[self.columns_dict['Height (cm)']]
dose += 0.0128 * datum[self.columns_dict['Weight (kg)']]
vk_gene = 'VKORC1 genotype: -1639 G>A (3673); chr16:31015190; rs9923231; C/T'
vk_gene2 = 'VKORC1 QC genotype: -1639 G>A (3673); chr16:31015190; rs9923231; C/T'
dose -= 0.8677 * (datum[self.columns_dict[vk_gene]] == self.values_dict[vk_gene]['A/G'] or
datum[self.columns_dict[vk_gene2]] == self.values_dict[vk_gene2]['A/G'])
dose -= 1.6974 * (datum[self.columns_dict[vk_gene]] == self.values_dict[vk_gene]['A/A'] or
datum[self.columns_dict[vk_gene2]] == self.values_dict[vk_gene2]['A/A'])
dose -= 0.4854 * (datum[self.columns_dict[vk_gene]] == self.values_dict[vk_gene]['NA'] and
datum[self.columns_dict[vk_gene2]] == self.values_dict[vk_gene2]['NA'])
dose -= 0.5211 * datum[self.columns_dict['CYP2C9 consensus']] == self.values_dict['CYP2C9 consensus']['*1/*2']
dose -= 0.9357 * datum[self.columns_dict['CYP2C9 consensus']] == self.values_dict['CYP2C9 consensus']['*1/*3']
dose -= 1.0616 * datum[self.columns_dict['CYP2C9 consensus']] == self.values_dict['CYP2C9 consensus']['*2/*2']
dose -= 1.9206 * datum[self.columns_dict['CYP2C9 consensus']] == self.values_dict['CYP2C9 consensus']['*2/*3']
dose -= 2.3312 * datum[self.columns_dict['CYP2C9 consensus']] == self.values_dict['CYP2C9 consensus']['*3/*3']
dose -= 0.2188 * datum[self.columns_dict['CYP2C9 consensus']] == self.values_dict['CYP2C9 consensus']['NA']
dose -= 0.1092 * (datum[self.columns_dict['Race']] == self.values_dict['Race']['Asian'])
dose -= 0.2760 * (datum[self.columns_dict['Race']] == self.values_dict['Race']['Black or African American'])
dose -= 0.1032 * (datum[self.columns_dict['Race']] == self.values_dict['Race']['NA'])
dose += 1.1816 * self._get_enzyme_inducer_status(datum)
#Enzyme inducer status
dose -= 0.5503 * datum[self.columns_dict['Amiodarone (Cordarone)']]
# dose calculated in appx.pdf states that it's the sqrt of weekly
return util.bucket(dose ** 2)