-
Notifications
You must be signed in to change notification settings - Fork 0
/
generateFeatures.py
159 lines (125 loc) · 6.71 KB
/
generateFeatures.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
import multiprocessing
from qcnn.small_quanv import gen_quanv
import sys
sys.path.append("./stqft")
sys.path.append("./qcnn")
import os
#Activate the cuda env
os.environ["LD_LIBRARY_PATH"] = "$LD_LIBRARY_PATH:/usr/local/cuda/lib64/:/usr/lib64:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda-11.2/lib64:/usr/local/cuda/targets/x86_64-linux/lib/"
import glob
import numpy as np
import time
import pickle
# from multiprocessing import Pool
from multiprocessing import Pool
from stqft.utils import PI
from stqft.frontend import signal, transform
from stqft.stqft import stqft_framework
from stqft.qft import loadBackend, loadNoiseModel, setupMeasurementFitter
from qcnn.small_qsr import gen_train_from_wave, gen_train_from_wave_no_split
from qcnn.small_quanv import gen_quanv
av = 0
nQubits=10
samplingRate=16000 #careful: this may be modified when calling gen_features
numOfShots=4096
signalThreshold=0.01 #optimized according to thesis
minRotation=0.2 #PI/2**(nQubits-4)
overlapFactor=0.875
windowLength = 2**nQubits
windowType='blackman'
suppressPrint=True
useNoiseModel=True
backend="ibmq_guadalupe" #ibmq_guadalupe, ibmq_melbourne (noisier)
noiseMitigationOpt=1
numOfRuns=1
simulation=True
transpileOnce=True
transpOptLvl=1
fixZeroSignal=False
scale='mel'
normalize=True
nMels=60
fmin=40.0
enableQuanv=True
def reportSettings():
return f"numOfShots:{numOfShots}; signalFilter:{signalThreshold}; minRotation:{minRotation}; nSamplesWindow:{windowLength}; overlapFactor:{overlapFactor}; windowType:{windowType}; scale:{scale}; normalize:{normalize}; nMels:{nMels}; fmin:{fmin}"
def gen_mel(audioFile:str, backendInstance=backend, noiseModel=None, filterResultCounts=None, show=False, minRotation=minRotation,signalThreshold=signalThreshold,noiseMitigationOpt=noiseMitigationOpt):
global backendStorage
print(f"Processing {audioFile}")
start = time.time()
#the following parameters are subject of evaluation prior to the training process
# Frontend Signal instantiation
y = signal(samplingRate=samplingRate, signalType='file', path=audioFile)
# QFT init
stqft = transform(stqft_framework,
numOfShots=numOfShots,
minRotation=minRotation, signalThreshold=signalThreshold, fixZeroSignal=fixZeroSignal,
suppressPrint=suppressPrint, draw=False,
simulation=simulation,
noiseMitigationOpt=noiseMitigationOpt, filterResultCounts=filterResultCounts,
useNoiseModel=useNoiseModel, noiseModel=noiseModel, backend=backendInstance,
transpileOnce=transpileOnce, transpOptLvl=transpOptLvl)
# STQFT init
y_hat_stqft, f, t = stqft.forward(y,
nSamplesWindow=windowLength,
overlapFactor=overlapFactor,
windowType=windowType,
suppressPrint=suppressPrint)
# Frontend Post Processing
y_hat_stqft_p, f_p, t_p = stqft.postProcess(y_hat_stqft, f ,t, scale=scale, normalize=normalize, samplingRate=y.samplingRate, nMels=nMels, fmin=fmin, fmax=y.samplingRate/2)
diff = time.time()-start
print(f"Iteration took {diff} s")
if show:
stqft.show(y_hat_stqft_p, f_p, t_p, title=f"STQFT")
return y_hat_stqft_p
def poolProcess(labelFileAndBackendInstance:list):
wave = gen_mel(*labelFileAndBackendInstance)
return np.expand_dims(wave[:,1:], axis=2)
def gen_features(labels:list, train_audio_path:str, outputPath:str, PoolSize:int, waveformPath:str=None, portion:int=1, split:bool=True):
all_wave = list()
all_labels = list()
# need to do some pre-initialization mostly because of api restrictions and resources concerns
_, backendInstance = loadBackend(backendName=backend, simulation=simulation)
_, noiseModel = loadNoiseModel(backendName=backendInstance)
if noiseMitigationOpt != 0:
filterResultCounts = setupMeasurementFitter(backendInstance, noiseModel,
transpOptLvl=transpOptLvl, nQubits=nQubits,
nShots=numOfShots, nRuns=numOfRuns,
suppressPrint=suppressPrint)
else:
filterResultCounts = None
for i, label in enumerate(labels): #iterate over labels, so we don't run into concurrency issues with the mapping
print(f"\n---------[Label {i}/{len(labels)}]---------\n")
temp_waves = list()
datasetLabelFiles = glob.glob(f"{train_audio_path}/{label}/*.wav") #gather all label specific sample files
# TODO: maybe change that to "random"?
portDatsetLabelFiles = datasetLabelFiles[0::portion] #get only a portion of those files
# ^ (validated) ^
print(f"\nUsing {len(portDatsetLabelFiles)} out of {len(datasetLabelFiles)} files for label '{label}'\n")
with Pool(PoolSize) as p:
temp_waves = p.map(poolProcess, list(zip(portDatsetLabelFiles,[backendInstance]*len(portDatsetLabelFiles), [noiseModel]*len(portDatsetLabelFiles), [filterResultCounts]*len(portDatsetLabelFiles)))) #mapping samples to processes and output back to waveform array
# ^ (validated) ^ When running "single threaded" in the multiprocessing.dummy module with PoolSize=1
# ^ (validated) ^ When running in standard multiprocessing module with PoolSize=3
#appending waves and labels at the END of both arrays
all_wave = all_wave + temp_waves.copy() #copy to break the reference here
# ^ (validated) ^
all_labels = all_labels + [label]*len(portDatsetLabelFiles) #extend the array by the label n times
# ^ (validated) ^
print(f"\n Generated {len(temp_waves)} waves. In total {len(all_wave)} waves and {len(all_labels)} labels\n")
tid = time.time()
print(f"Finished generating waveforms at {tid}")
if waveformPath != None:
with open(f"{waveformPath}/waveforms.pckl", 'wb') as fid:
pickle.dump(all_wave, fid, pickle.HIGHEST_PROTOCOL)
with open(f"{waveformPath}/labels.pckl", 'wb') as fid:
pickle.dump(all_labels, fid, pickle.HIGHEST_PROTOCOL)
print(f"Finished dumping cache")
print(f"Starting Feature export")
#dirty decision, but usefull when called from test.py (where we don't need to split)
if split:
return gen_train_from_wave(all_wave=all_wave, all_label=all_labels, output=outputPath)
else:
return gen_train_from_wave_no_split(all_wave=all_wave, all_label=all_labels, output=outputPath)
def gen_quantum(x_train, x_valid, kr, output, poolSize=1, quanv=enableQuanv):
#simple pass-through
return gen_quanv(x_train, x_valid, kr, output, poolSize, quanv=quanv)