-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path__main__.py
225 lines (158 loc) · 7.8 KB
/
__main__.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
import zipfile, copy, collections, os, sys
def myquit():
input('Press any key to exit...')
quit()
try:
import lxml
except ImportError:
# try to install lxml
print('Please run the following command in your command line (Internet connection is required)')
print('pip install lxml\n')
myquit()
# sys.path.append(os.path.dirname(os.path.expanduser(r'D:\projects\projects\17. story_planner\final\visio\lxml')))
# sys.path.reverse()
# from lxml import etree
# path_list = [r'D:\projects\projects\17. story_planner\final\visio\lxml']
# for i in path_list:
# sys.path.append(os.path.dirname(os.path.expanduser(i)))
# sys.path.append('lxml.zip')
# sys.path.append('bs4.zip')
# import bs4
from bs4 import BeautifulSoup
archive_file_name = str(__file__.split('\\')[0])
class_name:str = ''
# used to rsetore program state after some errors
if len(sys.argv) > 1:
class_name = sys.argv[1]
else:
class_name = input('\nType your class name: (press enter for default: "Question")\n-> ')
if class_name == '':
class_name = 'Question'
# input : file path string (like '/pages/page1.xml')
# output: file name without extension (like 'page1')
def get_file_name(file_path:str)->str:
return os.path.splitext(os.path.split(file_path)[1])[0]
class extract_from_visio_file:
def __init__(self,file_name:str):
self.file_name = os.path.splitext(file_name)[0]
try:
archive = zipfile.ZipFile(file_name, 'r')
# if its a bad zip error it means user open old visio files that python can not read
except zipfile.BadZipFile:
print('\nSelected file is for earlier version of visio (2003-2010) that is not supported.')
myquit()
except:
print('Unknown error while opening file...')
myquit()
print('\nAvailable pages:\n(usually is the heaviest one)')
# list all files in the pages directory except pages.xml
eligibale_files = [i for i in archive.infolist() if os.path.split(i.filename)[0] == 'visio/pages' and i.filename != 'visio/pages/pages.xml']
# pick the heaviest item name
heaviest_item_name = get_file_name(sorted(eligibale_files,key=lambda x: x.file_size)[-1].filename)
for i in eligibale_files:
# print items
print(f'- {get_file_name(i.filename)}\t {i.file_size} \t bytes')
# ask user to pick a page
worksheet_name = input(f'\nType visio page name: (press enter for default: "{heaviest_item_name}") \n-> ').lower()
print('')
# if user doesnt enter any page name -> use the heaviest page name
if worksheet_name == '':
worksheet_name = heaviest_item_name
# try to read the entered page
try:
r = archive.read(f'visio/pages/{worksheet_name}.xml').decode()
archive.close()
except:
print('Counter error while reading zip file. Probably page name is invalid.')
os.system(f'python {archive_file_name} {class_name} {target_file_name}')
quit()
self.b = BeautifulSoup(r,'xml')
# use try beacuse some pages does not have have any shape/connection
try:
self.shapes = self.b.findAll('Shape')
self.connects = self.b.find('Connects').findAll('Connect')
except:
print(f'there is nothing in "{worksheet_name}" page - retry with another page')
os.system(f'python {archive_file_name} {class_name} {target_file_name}')
quit()
# prepare a template for connected topics
self.connect_pairs = collections.defaultdict(list)
# prepare a final result template
self.object_map = collections.defaultdict(lambda : copy.deepcopy({'story':'','link_texts':[],'ids':[]}))
# gather two way connections -> use by attribute "FromSheet"
for connect in self.connects:
self.connect_pairs[int(connect['FromSheet'])].append(connect)
# iterate over all connections
for index,pair in self.connect_pairs.items():
# declare here to get access in this block
target_id = -1
source_id = -1
# whether this connection is a start to end connection
if pair[0]['FromCell'] == 'EndX':
target_id = int(pair[0]['ToSheet'])
source_id = int(pair[1]['ToSheet'])
# or is a end to start connection
else:
source_id = int(pair[0]['ToSheet'])
target_id = int(pair[1]['ToSheet'])
# get text of start element (story)
story = self.get_shape_text(self.find_shape_by_id(source_id))
# get text of end element (option)
link_text = self.get_shape_text(self.find_shape_by_id(index))
# create condition here to doesnt save story every time we iterate over its connections
if self.object_map[source_id]['story'] == '':
self.object_map[source_id]['story']= self.get_shape_text(self.find_shape_by_id(source_id))
if self.object_map[target_id]['story'] == '':
self.object_map[target_id]['story']= self.get_shape_text(self.find_shape_by_id(target_id))
# place link text in result template
self.object_map[source_id]['link_texts'].append(link_text)
# place link id in result template
self.object_map[source_id]['ids'].append(target_id)
def find_shape_by_id(self,shape_id):
# make sure shape id is string type
if type(shape_id) == int:
shape_id = str(shape_id)
return self.b.find('Shape',{'ID':shape_id})
def get_shape_text(self,shape_element):
try:
return shape_element.find('Text').text.strip()
except:
return ''
def to_string(self):
# make final result printable
result:str = ''
for index,item in self.object_map.items():
end =',\n'
if index==list(self.object_map.keys())[-1]:
end = ''
result+=f"{class_name}({index}, '{item['story']}', {item['link_texts']}, {item['ids']}){end}"
return result
def save_to_disk(self):
# save result file same name as opened file but with 'txt' format
result_file_name = self.file_name+'.txt'
# check wether is there any file similar to result file name
if os.path.exists(result_file_name):
print(f'ERROR - Could not save the result file - there is already a "{result_file_name}" file in the directory')
myquit()
try:
with open(result_file_name,'w') as file:
file.write(self.to_string())
print(f'Result file ("{result_file_name}") saved successfully.')
except:
print(f'Counter error while saving "{result_file_name}" to disk...')
print('Retry or apply it on a copy of your file\n')
target_file_name :str = ''
# used to restore program state after some errors
if(len(sys.argv) > 2 ):
target_file_name = sys.argv[2]
else:
target_file_name = input('\nType Visio file name with extension (like "1.vsdx"):\n-> ')
# check wether entered file name is exist or not
if os.path.exists(target_file_name) == False:
print('\nERROR - either you enter invalid file name or the file is not in the directory that holds this file')
os.system(f'python {archive_file_name} {class_name}')
quit()
a = extract_from_visio_file(target_file_name)
a.save_to_disk()
# make program doesnt end until user wants to
myquit()