-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparse_icd10.py
executable file
·100 lines (84 loc) · 2.72 KB
/
parse_icd10.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#!/usr/bin/python3
# Python 3.6.0 (default, Jan 16 2017, 12:12:55)
# [GCC 6.3.1 20170109] on linux
# parse SAS scripts
import os
import pickle
import re
# read format script
fName = "hcup/comformat_icd10cm_2017.txt"
try:
fhand = open(fName)
except:
print("Error: failed to open/find", fName)
exit()
comformat = fhand.readlines()
# define output file name
outDir = "dict/"
if not os.path.exists(outDir):
os.makedirs(outDir)
outFile = outDir + "icd10_subme.pickle"
# define dictionary based on key word: value
valueRun = []
equalSignLines = []
dictNames = []
dictValues = []
for i in range(0, len(comformat)):
line = comformat[i].strip().lower()
# ignore comments
comformat[i] = re.sub('/\*.*\*/|"|\$|,$|;$', "", line).strip()
if comformat[i].startswith(("proc", "other")):
continue
if 'value' in comformat[i]:
dictNames.append(comformat[i].split()[1])
valueRun.append(i)
if '=' in comformat[i]:
equalSignLines.append(i)
tmp = re.search('=[ ]*.*', comformat[i]).group(0)
value = re.sub('=|[ ]', "", tmp)
dictValues.append(value)
comformat[i] = re.sub(value + '|[ ]|=', "", comformat[i])
if comformat[i].startswith("run"):
valueRun.append(i)
# create dictionaries
k = 0
for i in range(len(valueRun) - 1):
exec(dictNames[i] + " = dict()")
ind = range(valueRun[i] + 1, valueRun[i + 1])
for j in ind:
if (j > equalSignLines[k]):
k += 1
key = comformat[j]
if len(key) == 0 or key.startswith("other"):
continue
# for rcomfmt
if i == 0:
comm = '["' + key + '"] = "' + dictValues[k] + '"'
exec(dictNames[i] + comm)
# for the rest
else:
keyList1 = re.sub(",[ ]*", ",", key).split(",")
for oneKey1 in keyList1:
tmpList = list(map(int, oneKey1.split("-")))
if len(tmpList) > 1:
keyList2 = list(range(tmpList[0], tmpList[1] + 1))
else:
keyList2 = tmpList
for oneKey2 in keyList2:
comm = '[' + str(oneKey2) + '] = "' + dictValues[k] + '"'
exec(dictNames[i] + comm)
# save generated dictionaries into pickles
fName = "icd10_dictionaries.txt"
try:
fout = open(fName, "w")
for oneDict in dictNames:
outName = re.sub("subme", oneDict, outFile)
fout.write(outName + "\n")
with open(outName, 'wb') as handle:
exec("pickle.dump(" + oneDict +
", handle, protocol = pickle.HIGHEST_PROTOCOL)")
fout.close()
except:
print("Error: failed to create", fName)
exit()
print("ICD-10 dictionaries have been successfully generated.")