-
Notifications
You must be signed in to change notification settings - Fork 56
/
Copy pathData_Extraction_Revised.py
49 lines (44 loc) · 1.58 KB
/
Data_Extraction_Revised.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import xml.etree.ElementTree as ET
import numpy as np
import os
import time
import_directory = 'C:\\Users\Tamim Mirza\Documents\ISCX\labeled_flows_xml\\'
files = os.listdir(import_directory)
errors = []
start_time = time.time()
i = -1
data_array = np.empty((0, 2))
counter = 0
actual = (50**2) * 3
for file in files:
print(file)
try:
tree = ET.parse(import_directory + file)
print('Reading File ', file)
root = tree.getroot()
except:
errors += file
continue
for child in root:
for next_child in child:
if next_child.tag == 'destinationPayloadAsUTF':
if next_child.text is not None:
x = next_child.text
if len(x) > actual:
x = x[: actual]
else:
while len(x) < actual:
x += x
x = x[:actual]
if child.find('Tag').text == 'Normal':
data_array = np.vstack((data_array, np.array([np.fromstring(x, dtype=np.uint8), 0])))
else:
data_array = np.vstack((data_array, np.array([np.fromstring(x, dtype=np.uint8), 1])))
counter += 1
print('Time taken: {}'.format(time.time() - start_time))
start_time = time.time()
np.save('Database2\destinationPayload_' + file, np.array(data_array))
data_array = np.empty((0, 2))
print('Error in Opening Files = ', errors)
print('Counter = ', counter)
print('DONE!')