-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata_to_R.py
118 lines (99 loc) · 3.14 KB
/
data_to_R.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import wfdb
import numpy as np
import csv
import pickle
import pandas as pd
import signalFeatures as sf
'''
['Gestation 35',
'Rectime 31.3',
'Age 30',
'Parity 0',
'Abortions 0',
'Weight 58',
'Hypertension no',
'Diabetes no',
'Placental_position front',
'Bleeding_first_trimester no',
'Bleeding_second_trimester no',
'Funneling negative',
'Smoker no']
'''
FEATURES = ['Gestation', 'Age', 'Parity', 'Abortions', 'Weight', 'Hypertension', 'Diabetes', 'Placental_position', 'Bleeding_first_trimester', 'Bleeding_second_trimester', 'Funneling', 'Smoker']
def check_data():
'''
check
features_labels.pkl
actual_features_labels.pkl
'''
with open('actual_features_labels.pkl', 'rb') as data_file:
features, labels = pickle.load(data_file)
# print(features, labels)
for feature in features:
print(feature)
pass
def get_records():
"""
get all 300 records from the database
:return: a list of all 300 records
"""
record_list_file_name = "RECORDS.txt"
records = []
with open(record_list_file_name, "r") as f:
for line in f:
record_name = line[:-1]
record = wfdb.rdrecord(record_name,pb_dir='tpehgdb/tpehgdb')
records.append(record)
# stores the records
# with open('records.pkl', 'wb') as f:
# all_records = get_records()
# print(len(all_records))
# pickle.dump(all_records,f)
return records
def read_data():
'''
read the records from the pickle file
'''
file_name = 'records.pkl'
records = []
with open(file_name, 'rb') as f:
records = pickle.load(f)
return records
# print(len(records))
def parsing_data(records=None):
'''
Extract the features from records
300 records => 300 x 12 arr
'''
full_record_arr = np.empty((0,0))
FULL_RECORD_DIMENSION = (300,12)
for record in records:
patient_record_values_list = []
_, fields = wfdb.rdsamp(record.record_name,pb_dir='tpehgdb/tpehgdb')
for field in fields['comments'][2:]:
field_key_val_list = field.split()
patient_record_values_list.append(field_key_val_list[1])
del patient_record_values_list[1]
record_arr = np.asarray(patient_record_values_list)
full_record_arr = np.append(full_record_arr, record_arr)
# reshape the full record array
full_record_arr = full_record_arr.reshape(FULL_RECORD_DIMENSION)
# convert to pandas dataframe
all_rec_df = pd.DataFrame(full_record_arr)
# set column name to FEATURES
all_rec_df.columns = FEATURES
return all_rec_df
def export_csv(records_df):
records_df.to_csv('all_patient_records.csv', index=False)
print('done')
if __name__ == "__main__":
# print(FEATURES)
# records = read_data()
# print('done reading, now parsing')
# all_records = parsing_data(records=records)
# print('export to csv')
# export_csv(records_df=all_records)
fvlFile = "tpehgdb_features__filter_0.08_Hz-4.0_Hz.fvl"
canceled_index = []
fourFeatures = sf.rdFVL(fvlFile, canceled_index, 1)
print(fourFeatures)