This repository has been archived by the owner on Sep 29, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtk3_sl.n
258 lines (258 loc) · 30.3 KB
/
tk3_sl.n
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
#!/usr/bin/env batchflow
<?xml version="1.0"?>
<Document>
<Network type="subnet" name="MAIN">
<Node name="node_MAIN_LOOP_1" type="MAIN_LOOP" x="100" y="100">
<Parameter name="LENGTH" type="int" value="512" description="The frame length of each channel (in samples) [default: 512]."/>
</Node>
<NetOutput name="ASR" node="node_MAIN_LOOP_1" terminal="ASR" object_type="any" description="Dynamic"/>
<NetOutput name="SaveWavePCM" node="node_MAIN_LOOP_1" terminal="SaveWavePCM" object_type="Map<int,ObjectRef>" description="The same as input."/>
<NetOutput name="OUTPUT_1" node="node_MAIN_LOOP_1" terminal="OUTPUT_1" object_type="ObjectRef" description="This is a dummy output, and it has no mean. Only for an activation of this module."/>
<NetOutput name="VALUE" node="node_MAIN_LOOP_1" terminal="VALUE" object_type="int" description="int parameter"/>
</Network>
<Network type="iterator" name="MAIN_LOOP">
<Node name="node_MultiFFT_1" type="MultiFFT" x="480" y="100">
<Parameter name="LENGTH" type="subnet_param" value="LENGTH" description="FFT length in sample. [default: 512]"/>
<Parameter name="WINDOW" type="string" value="CONJ" description="A window function for FFT. WINDOW should be CONJ, HAMMING, RECTANGLE, or HANNING. [default: CONJ]"/>
<Parameter name="WINDOW_LENGTH" type="subnet_param" value="LENGTH" description="Window length of the window function. [default: 512]"/>
</Node>
<Node name="node_sub_separation_1" type="sub_separation" x="840" y="260">
</Node>
<Node name="node_sub_recognition_1" type="sub_recognition" x="950" y="110">
<Parameter name="FBANK_COUNT" type="int" value="13" description="The size of the input feature vector."/>
<Parameter name="LENGTH" type="int" value="512" description="Size of window length in sample. [default: 512]"/>
<Parameter name="SAMPLING_RATE" type="int" value="16000" description="Sampling rate in Hz. [default: 16000]"/>
</Node>
<Node name="node_sub_localization_1" type="sub_localization" x="560" y="230">
<Parameter name="LENGTH" type="int" value="512" description="The length of a frame (per channel)."/>
<Parameter name="SAMPLING_RATE" type="int" value="16000" description="Sampling Rate (Hz)."/>
</Node>
<Node name="node_SaveWavePCM_1" type="SaveWavePCM" x="1300" y="260">
<Parameter name="BASENAME" type="string" value="sep_files/sep_" description="Basename of files. [default: sep_]"/>
<Parameter name="ADVANCE" type="int" value="160" description="The shift length beween adjacent frames (in samples)[default: 160]."/>
<Parameter name="SAMPLING_RATE" type="int" value="16000" description="Sampling rate (in samples)[default: 16000]."/>
<Parameter name="BITS" type="string" value="int24" description="Bit format of samples. int16 , int24 and float32 bits are supported."/>
<Parameter name="INPUT_BITS" type="string" value="auto" description="Bit format of input wav file."/>
</Node>
<Node name="node_Synthesize_1" type="Synthesize" x="1110" y="260">
<Parameter name="LENGTH" type="int" value="512" description="Size of window length in sample. [default: 512]"/>
<Parameter name="ADVANCE" type="int" value="160" description="The length in sample between a frame and a previous frame. [default: 160]"/>
<Parameter name="SAMPLING_RATE" type="int" value="16000" description="Sampling rate (Hz) [default: 16000]."/>
<Parameter name="MIN_FREQUENCY" type="int" value="125" description="Minimum frequency (Hz) [default: 125]"/>
<Parameter name="MAX_FREQUENCY" type="int" value="7900" description="Maximum frequency (Hz) [default: 7900]"/>
<Parameter name="WINDOW" type="string" value="HAMMING" description="A window function for overlap-add. WINDOW should be CONJ, HAMMING, RECTANGLE, or HANNING. [default: HAMMING]"/>
<Parameter name="OUTPUT_GAIN" type="float" value="1.0" description="Output gain factor. [default: 1.0]"/>
</Node>
<Node name="node_AudioStreamFromMic_1" type="AudioStreamFromMic" x="160" y="100">
<Parameter name="LENGTH" type="int" value="512" description="The frame length of each channel (in samples) [default: 512]."/>
<Parameter name="ADVANCE" type="int" value="160" description="The shift length beween adjacent frames (in samples)[default: 160]."/>
<Parameter name="CHANNEL_COUNT" type="int" value="8" description="The number of channels."/>
<Parameter name="SAMPLING_RATE" type="int" value="16000" description="Sampling rate (Hz) [default: 16000]."/>
<Parameter name="DEVICETYPE" type="string" value="ALSA" description="Device type [default: WS]."/>
<Parameter name="GAIN" type="string" value="0dB" description="capture gain (dB) [default: 0dB]."/>
<Parameter name="DEVICE" type="string" value="plughw:1,0" description="Device name or IP address [default: 127.0.0.1]"/>
</Node>
<Node name="node_RosNodeGenerator_1" type="RosNodeGenerator" x="300" y="410">
<Parameter name="NODE_NAME" type="string" value="HARK_MASTER_NODE" description="Node name for ROS"/>
</Node>
<Node name="node_RosHarkMsgsPublisher_1" type="RosHarkMsgsPublisher" x="800" y="460">
<Parameter name="ADVANCE" type="int" value="160" description="Shift sample number for sliding spectrum analysis."/>
<Parameter name="ENABLE_DEBUG" type="bool" value="false" description="print debug message of this module in case of true."/>
<Parameter name="TOPIC_NAME_HARKWAVE" type="string" value="HarkWave" description="Published topic name for ROS (HarkWave type message)"/>
<Parameter name="TOPIC_NAME_HARKFFT" type="string" value="HarkFFT" description="Published topic name for ROS (HarkFFT type message)"/>
<Parameter name="TOPIC_NAME_HARKFEATURE" type="string" value="HarkFeature" description="Published topic name for ROS (HarkFeature type message)"/>
<Parameter name="TOPIC_NAME_HARKSOURCE" type="string" value="HarkSource" description="Published topic name for ROS (HarkSource type message)"/>
<Parameter name="TOPIC_NAME_HARKSRCWAVE" type="string" value="HarkSrcWave" description="Published topic name for ROS (HarkSrcWave type message)"/>
<Parameter name="TOPIC_NAME_HARKSRCFFT" type="string" value="HarkSrcFFT" description="Published topic name for ROS (HarkSrcFFT type message)"/>
<Parameter name="TOPIC_NAME_HARKSRCFEATURE" type="string" value="HarkSrcFeature" description="Published topic name for ROS (HarkSrcFeature type message)"/>
<Parameter name="TOPIC_NAME_HARKSRCFEATUREMFM" type="string" value="HarkSrcFeatureMFM" description="Published topic name for ROS (HarkSrcFeatureMFM type message)"/>
<Parameter name="BUFFER_NUM" type="int" value="100" description="Buffer size for a ROS published message"/>
<Parameter name="ROS_LOOP_RATE" type="float" value="100000" description="This allows you to specify a frequency that you would like to loop at [Hz]. Keep this value large. (If ROS interval is shorter than HARK interval, ROS interval is overwritten.)"/>
<Parameter name="TIMESTAMP_TYPE" type="string" value="ROS_TIME_NOW" description="Time stamp type. If TIMESTAMP is connected, this is ignored."/>
<Parameter name="SAMPLING_RATE" type="int" value="16000" description="The time increment is caluculated as ADVANCE / SAMPLING_RATE"/>
<Parameter name="ROS_FRAME_ID" type="string" value="HarkRosFrameID" description="ROS frame_id of the message header"/>
</Node>
<Node name="node_CMMakerFromFFT_1" type="CMMakerFromFFT" x="290" y="280">
<Parameter name="WINDOW" type="int" value="50" description="The number of frames used for calculating a correlation function."/>
<Parameter name="PERIOD" type="int" value="50" description="The period in which the output correlation matrix is renewed."/>
<Parameter name="WINDOW_TYPE" type="string" value="FUTURE" description="Window selection to accumulate a correlation function. If PAST, the past WINDOW frames from the current frame are used for the accumulation. If MIDDLE, the current frame will be the middle of the accumulated frames. If FUTURE, the future WINDOW frames from the current frame are used for the accumulation. FUTURE is the default from version 1.0, but this makes a delay since we have to wait for the future information. PAST generates a internal buffers for the accumulation, which realizes no delay for localization."/>
<Parameter name="ENABLE_DEBUG" type="bool" value="false" description="enable debug print"/>
</Node>
<Link from="node_MultiFFT_1" output="OUTPUT" to="node_sub_separation_1" input="SPEC"/>
<Link from="node_sub_localization_1" output="OUTPUT" to="node_sub_separation_1" input="SRC_INFO"/>
<Link from="node_sub_localization_1" output="OUTPUT" to="node_sub_recognition_1" input="SRC_INFO"/>
<Link from="node_AudioStreamFromMic_1" output="AUDIO" to="node_MultiFFT_1" input="INPUT"/>
<Link from="node_Synthesize_1" output="OUTPUT" to="node_SaveWavePCM_1" input="INPUT"/>
<Link from="node_sub_separation_1" output="OUTPUT" to="node_Synthesize_1" input="INPUT"/>
<Link from="node_sub_separation_1" output="OUTPUT" to="node_sub_recognition_1" input="SPEC"/>
<Link from="node_MultiFFT_1" output="OUTPUT" to="node_sub_localization_1" input="WAV"/>
<Link from="node_sub_localization_1" output="OUTPUT" to="node_RosHarkMsgsPublisher_1" input="SRC_INFO"/>
<Link from="node_MultiFFT_1" output="OUTPUT" to="node_CMMakerFromFFT_1" input="INPUT"/>
<Link from="node_Synthesize_1" output="OUTPUT" to="node_RosHarkMsgsPublisher_1" input="SRC_WAVE"/>
<NetOutput name="SaveWavePCM" node="node_SaveWavePCM_1" terminal="OUTPUT" object_type="Map<int,ObjectRef>" description="The same as input."/>
<NetOutput name="ASR" node="node_sub_recognition_1" terminal="ASR" object_type="any" description="Dynamic"/>
<NetOutput name="VALUE" node="node_RosNodeGenerator_1" terminal="VALUE" object_type="int" description="int parameter"/>
<NetOutput name="OUTPUT_1" node="node_RosHarkMsgsPublisher_1" terminal="OUTPUT" object_type="ObjectRef" description="This is a dummy output, and it has no mean. Only for an activation of this module."/>
<NetCondition name="CONDITION" node="node_AudioStreamFromMic_1" terminal="NOT_EOF"/>
</Network>
<Network type="subnet" name="sub_separation">
<Node name="node_PowerCalcForMap_1" type="PowerCalcForMap" x="100" y="340">
<Parameter name="POWER_TYPE" type="string" value="POW" description="Measure for computing the POW or MAG (i.e. power or magnitude) of the complex spectrum [default: POW]"/>
</Node>
<Node name="node_CalcSpecAddPower_1" type="CalcSpecAddPower" x="600" y="340">
</Node>
<Node name="node_CalcSpecSubGain_1" type="CalcSpecSubGain" x="550" y="170">
<Parameter name="ALPHA" type="float" value="1.0" description="Overestimation factor."/>
<Parameter name="BETA" type="float" value="0.0" description="Spectral floor."/>
<Parameter name="SS_METHOD" type="int" value="2" description="1: Magnitude Spectral Subtraction, 2: Power SS"/>
</Node>
<Node name="node_EstimateLeak_1" type="EstimateLeak" x="240" y="440">
<Parameter name="LEAK_FACTOR" type="float" value="0.25" description="Leak factor [default:0.25]"/>
<Parameter name="OVER_CANCEL_FACTOR" type="float" value="1" description="Over cancel value [default:1]"/>
</Node>
<Node name="node_HRLE_1" type="HRLE" x="340" y="340">
<Parameter name="LX" type="float" value="0.20" description="Lx value of estimation, e.g. Lx=0 -> Minimum (MCRA), Lx=0.5 -> Median , Lx=1.0 -> Maximum [default:0.85]"/>
<Parameter name="TIME_CONST_METHOD" type="string" value="LEGACY" description="Time constant value definition, "LEGACY" uses time constant value for HARK 2.0.0,"MILLISECOND" uses time constant value in frames. [default: LEGACY]"/>
<Parameter name="TIME_CONSTANT" type="float" value="16000" description="Time constant for exponential decay window in samples [default:]"/>
<Parameter name="DECAY_FACTOR" type="int" value="1000" description="Time constant for exponential decay window in millisecond [default:100]"/>
<Parameter name="ADVANCE" type="int" value="160" description="The length in sample between a frame and a previous frame. [default: 160]"/>
<Parameter name="SAMPLING_RATE" type="int" value="16000" description="Sampling rate (Hz) [default: 16000]."/>
<Parameter name="NUM_BIN" type="float" value="2000" description="Number of histogram bins [default:1000]"/>
<Parameter name="MIN_LEVEL" type="float" value="-200" description="Minimum level of histogram bin in dB [default:-100]"/>
<Parameter name="STEP_LEVEL" type="float" value="0.2" description="Step level of histogram bin (Width of each histogram bin) in dB [default:0.2]"/>
<Parameter name="DEBUG" type="bool" value="false" description="Prints the histogram for each 100 iterations."/>
</Node>
<Node name="node_SpectralGainFilter_1" type="SpectralGainFilter" x="1020" y="100">
</Node>
<Node name="node_GHDSS_1" type="GHDSS" x="100" y="100">
<Parameter name="LENGTH" type="int" value="512" description="The frame length of each channel (in samples) [default: 512]."/>
<Parameter name="ADVANCE" type="int" value="160" description="The shift length beween adjacent frames (in samples)[default: 160]."/>
<Parameter name="SAMPLING_RATE" type="int" value="16000" description="Sampling rate (Hz) [default: 16000]."/>
<Parameter name="LOWER_BOUND_FREQUENCY" type="int" value="0" description="Lower bound of frequency (Hz). [default: 0]"/>
<Parameter name="UPPER_BOUND_FREQUENCY" type="int" value="8000" description="Upper bound of frequency (Hz). [default: 8000]"/>
<Parameter name="TF_CONJ_FILENAME" type="string" value="/home/vlion/r/tamago_rectf.zip" description="Filename of a pre-measured transfer function for separation."/>
<Parameter name="INITW_FILENAME" type="string" value="" description="Filename of an initial separation matrix. If specified, a matrix in INITW_FILENAME is used as an initial separation matrix. Otherwise, initial separation matrix is estimated from the geometrical relationship or pre-measured TF according to TF_CONJ."/>
<Parameter name="SS_METHOD" type="string" value="ADAPTIVE" description="The calculation method for SS step size parameter corresponding to the blind separation part. "FIX" uses a fixed step size,"LC_MYU" uses the same value as LC_MYU, and "ADAPTIVE" adaptively estimates an optimal step size. [default: ADAPTIVE]"/>
<Parameter name="SS_SCAL" type="float" value="1.0" description="Scaling factor for SS step size. [default: 1.0]"/>
<Parameter name="SS_MYU" type="float" value="0.001" description="SS step size value. [default 0.001]"/>
<Parameter name="NOISE_FLOOR" type="float" value="0.0" description="Noise floor value. [default 0.0]"/>
<Parameter name="LC_CONST" type="string" value="DIAG" description="The calculation method for geometric constraints. "FULL" uses all elements of a matrix, and "DIAG" only uses diagonal parts. [default: FULL]"/>
<Parameter name="LC_METHOD" type="string" value="ADAPTIVE" description="The calculation method for LC step size corresponding to geometric constraints. "FIX" uses a fixed value, and "Adaptive" adaptively estimates an optimal step size. [default: ADAPTIVE]"/>
<Parameter name="LC_MYU" type="float" value="0.001" description="LC step size value. [default 0.001]"/>
<Parameter name="UPDATE_METHOD_TF_CONJ" type="string" value="POS" description="Switching method of TF_CONJ data. [default: POS]"/>
<Parameter name="UPDATE_METHOD_W" type="string" value="ID" description="Switching method of separation matrix, W. [default: ID]"/>
<Parameter name="UPDATE_ACCEPT_DISTANCE" type="float" value="300" description="Distance allowance to switch separation matrix in [mm]. available when when UPDATE_METHOD_W is POS or ID_POS. [default: 300.0]"/>
<Parameter name="EXPORT_W" type="bool" value="false" description="Separation matrix W is exported if true. [default: false]"/>
<Parameter name="EXPORT_W_FILENAME" type="string" value="" description="The filename to export W."/>
<Parameter name="UPDATE" type="string" value="STEP" description="The update method of separation matrix. "STEP" updates W sequentially, i.e., based on SS and then on LC cost. "TOTAL" updates W based on an integrated value of SS and LC cost [default: STEP]"/>
</Node>
<Link from="node_HRLE_1" output="NOISE_SPEC" to="node_CalcSpecAddPower_1" input="INPUT_POWER_SPEC1"/>
<Link from="node_EstimateLeak_1" output="LEAK_POWER_SPEC" to="node_CalcSpecAddPower_1" input="INPUT_POWER_SPEC2"/>
<Link from="node_PowerCalcForMap_1" output="OUTPUT" to="node_CalcSpecSubGain_1" input="INPUT_POWER_SPEC"/>
<Link from="node_CalcSpecAddPower_1" output="OUTPUT_POWER_SPEC" to="node_CalcSpecSubGain_1" input="NOISE_SPEC"/>
<Link from="node_PowerCalcForMap_1" output="OUTPUT" to="node_EstimateLeak_1" input="INPUT_POWER_SPEC"/>
<Link from="node_PowerCalcForMap_1" output="OUTPUT" to="node_HRLE_1" input="INPUT_SPEC"/>
<Link from="node_CalcSpecSubGain_1" output="VOICE_PROB" to="node_SpectralGainFilter_1" input="VOICE_PROB"/>
<Link from="node_CalcSpecSubGain_1" output="GAIN" to="node_SpectralGainFilter_1" input="GAIN"/>
<Link from="node_GHDSS_1" output="OUTPUT" to="node_PowerCalcForMap_1" input="INPUT"/>
<Link from="node_GHDSS_1" output="OUTPUT" to="node_SpectralGainFilter_1" input="INPUT_SPEC"/>
<NetInput name="SPEC" node="node_GHDSS_1" terminal="INPUT_FRAMES" object_type="Matrix<complex<float> >" description="Input multi-channel spectrum. A row is a channel, and a column is a spectrum for the corresponding channel."/>
<NetInput name="SRC_INFO" node="node_GHDSS_1" terminal="INPUT_SOURCES" object_type="Vector<ObjectRef>" description="Source locations with ID. Each element of the vector is a source location with ID specified by "Source"."/>
<NetOutput name="OUTPUT" node="node_SpectralGainFilter_1" terminal="OUTPUT_SPEC" object_type="Map<int,ObjectRef>" description="Estimated voice spectrum(Vector<complex<float> >) with a key(source ID)."/>
<NetOutput name="VOICE_PROB" node="node_CalcSpecSubGain_1" terminal="VOICE_PROB" object_type="Map<int,ObjectRef>" description="Probability of voice existence(Vector<float>) with a key(source ID)."/>
</Network>
<Network type="subnet" name="sub_recognition">
<Node name="node_Delta_1" type="Delta" x="960" y="100">
<Parameter name="FBANK_COUNT" type="int" value="14" description="The size of the input feature vector."/>
</Node>
<Node name="node_FeatureRemover_1" type="FeatureRemover" x="1140" y="100">
<Parameter name="SELECTOR" type="object" value="<Vector<int> 13>" description="Component indices in a feature vector to remove. E.g. <Vector<int> 13> to remove 14th comopnent (The index start with 0)."/>
</Node>
<Node name="node_MSLSExtraction_1" type="MSLSExtraction" x="740" y="100">
<Parameter name="FBANK_COUNT" type="subnet_param" value="FBANK_COUNT" description="Size of the static part of MSLS feature vector. [default: 13]"/>
<Parameter name="NORMALIZATION_MODE" type="string" value="SPECTRAL" description="The domain to perform normalization. CEPSTRAL or SPECTRAL. [default: CEPSTRAL]"/>
<Parameter name="USE_POWER" type="bool" value="true" description="Use power feature if true. [default: false]"/>
</Node>
<Node name="node_MelFilterBank_1" type="MelFilterBank" x="530" y="100">
<Parameter name="LENGTH" type="subnet_param" value="LENGTH" description="Size of window length in sample. [default: 512]"/>
<Parameter name="SAMPLING_RATE" type="subnet_param" value="SAMPLING_RATE" description="Sampling rate in Hz. [default: 16000]"/>
<Parameter name="CUTOFF" type="int" value="8000" description="Cutoff frequency in Hz. Mel-filterbanks are placed between 0 Hz and CUTOFF Hz. [default: 8000]"/>
<Parameter name="MIN_FREQUENCY" type="int" value="63" description="Minimum frequency (Hz) [default: 63]"/>
<Parameter name="MAX_FREQUENCY" type="int" value="8000" description="Maximum frequency (Hz) [default: 8000]"/>
<Parameter name="FBANK_COUNT" type="subnet_param" value="FBANK_COUNT" description="The number of Mel filter banks. [default: 13]"/>
</Node>
<Node name="node_PreEmphasis_1" type="PreEmphasis" x="330" y="100">
<Parameter name="LENGTH" type="subnet_param" value="LENGTH" description="window length in sample [default: 512]"/>
<Parameter name="SAMPLING_RATE" type="subnet_param" value="SAMPLING_RATE" description="Sampling rate in Hz [default: 16000]"/>
<Parameter name="PREEMCOEF" type="float" value="0.97" description="pre-emphasis coefficient [default: 0.97]"/>
<Parameter name="INPUT_TYPE" type="string" value="SPECTRUM" description="The domain to perform pre-emphasis [default: WAV]"/>
</Node>
<Node name="node_WhiteNoiseAdder_1" type="WhiteNoiseAdder" x="100" y="100">
<Parameter name="LENGTH" type="subnet_param" value="LENGTH" description="Size of window length in sample. [default: 512]"/>
<Parameter name="WN_LEVEL" type="float" value="1" description="An amplitude of white noise to be added. [default: 0]"/>
</Node>
<Node name="node_SpeechRecognitionSMNClient_1" type="SpeechRecognitionSMNClient" x="1360" y="100">
<Parameter name="MFM_ENABLED" type="bool" value="false" description="MFM is enbaled if true. [default: true]"/>
<Parameter name="HOST" type="string" value="127.0.0.1" description="Hostname or IP of Julius/Julian server. [default: 127.0.0.1]"/>
<Parameter name="PORT" type="int" value="5530" description="Port number of Julius/Julian server. [default: 5530]"/>
<Parameter name="SOCKET_ENABLED" type="bool" value="true" description="send data via socket if true. [default: true]"/>
</Node>
<Link from="node_MelFilterBank_1" output="OUTPUT" to="node_MSLSExtraction_1" input="FBANK"/>
<Link from="node_MSLSExtraction_1" output="OUTPUT" to="node_Delta_1" input="INPUT"/>
<Link from="node_Delta_1" output="OUTPUT" to="node_FeatureRemover_1" input="INPUT"/>
<Link from="node_PreEmphasis_1" output="OUTPUT" to="node_MelFilterBank_1" input="INPUT"/>
<Link from="node_PreEmphasis_1" output="OUTPUT" to="node_MSLSExtraction_1" input="SPECTRUM"/>
<Link from="node_WhiteNoiseAdder_1" output="OUTPUT" to="node_PreEmphasis_1" input="INPUT"/>
<Link from="node_FeatureRemover_1" output="OUTPUT" to="node_SpeechRecognitionSMNClient_1" input="FEATURES"/>
<Link from="node_FeatureRemover_1" output="OUTPUT" to="node_SpeechRecognitionSMNClient_1" input="MASKS"/>
<NetInput name="SPEC" node="node_WhiteNoiseAdder_1" terminal="INPUT" object_type="Map<int,ObjectRef>" description="Input spectrum. The key is source ID, and the value is a spectrum (Vector<complex<float> >)."/>
<NetInput name="SRC_INFO" node="node_SpeechRecognitionSMNClient_1" terminal="SOURCES" object_type="Vector<ObjectRef>" description="Source locations with ID. Each element of the vector is a source location with ID specified by "Source"."/>
<NetOutput name="ASR" node="node_SpeechRecognitionSMNClient_1" terminal="OUTPUT" object_type="Vector<ObjectRef>" description="The same as SOURCES."/>
</Network>
<Network type="subnet" name="sub_localization">
<Node name="node_LocalizeMUSIC_1" type="LocalizeMUSIC" x="100" y="110">
<Parameter name="MUSIC_ALGORITHM" type="string" value="SEVD" description="Sound Source Localization Algorithm. If SEVD, NOISECM will be ignored"/>
<Parameter name="TF_CHANNEL_SELECTION" type="object" value="<Vector<int> 0 1 2 3 4 5 6 7>" description="Microphone channels for localization. If vacant, all channels will be used."/>
<Parameter name="LENGTH" type="subnet_param" value="LENGTH" description="The length of a frame (per channel)."/>
<Parameter name="SAMPLING_RATE" type="subnet_param" value="SAMPLING_RATE" description="Sampling Rate (Hz)."/>
<Parameter name="A_MATRIX" type="string" value="/home/vlion/r/tamago_rectf.zip" description="Filename of a transfer function matrix."/>
<Parameter name="WINDOW" type="int" value="50" description="The number of frames used for calculating a correlation function."/>
<Parameter name="WINDOW_TYPE" type="string" value="PAST" description="Window selection to accumulate a correlation function. If PAST, the past WINDOW frames from the current frame are used for the accumulation. If MIDDLE, the current frame will be the middle of the accumulated frames. If FUTURE, the future WINDOW frames from the current frame are used for the accumulation. FUTURE is the default from version 1.0, but this makes a delay since we have to wait for the future information. PAST generates a internal buffers for the accumulation, which realizes no delay for localization."/>
<Parameter name="PERIOD" type="int" value="50" description="The period in which the source localization is processed."/>
<Parameter name="NUM_SOURCE" type="int" value="1" description="Number of sources, which should be less than number of channels."/>
<Parameter name="MIN_DEG" type="int" value="-180" description="source direction (lower)."/>
<Parameter name="MAX_DEG" type="int" value="180" description="source direction (higher)."/>
<Parameter name="LOWER_BOUND_FREQUENCY" type="int" value="1000" description="Lower bound of frequency (Hz) used for correlation function calculation."/>
<Parameter name="UPPER_BOUND_FREQUENCY" type="int" value="3400" description="Upper bound of frequency (Hz) used for correlation function calculation."/>
<Parameter name="SPECTRUM_WEIGHT_TYPE" type="string" value="A_Characteristic" description="MUSIC spectrum weight for each frequency bin."/>
<Parameter name="A_CHAR_SCALING" type="float" value="1.0" description="Scaling factor of the A-Weight with respect to frequency"/>
<Parameter name="MANUAL_WEIGHT_SPLINE" type="object" value="<Matrix<float> <rows 2> <cols 5> <data 0.0 2000.0 4000.0 6000.0 8000.0 1.0 1.0 1.0 1.0 1.0> >" description="MUSIC spectrum weight for each frequency bin. This is a 2 by M matrix. The first row represents the frequency, and the second row represents the weight gain. "M" represents the number of key points for the spectrum weight. The frequency range between M key points will be interpolated by spline manner. The format is "<Matrix<float> <rows 2> <cols 2> <data 1 2 3 4> >"."/>
<Parameter name="MANUAL_WEIGHT_SQUARE" type="object" value="<Vector<float> 0.0 2000.0 4000.0 6000.0 8000.0>" description="MUSIC spectrum weight for each frequency bin. This is a M order vector. The element represents the frequency points for the square wave. "M" represents the number of key points for the square wave weight. The format is "<Vector<float> 1 2 3 4>"."/>
<Parameter name="ENABLE_EIGENVALUE_WEIGHT" type="bool" value="false" description="If true, the spatial spectrum is weighted depending on the eigenvalues of a correlation matrix. We do not suggest to use this function with GEVD and GSVD, because the NOISECM changes the eigenvalue drastically. Only useful for SEVD."/>
<Parameter name="MAXNUM_OUT_PEAKS" type="int" value="-1" description="Maximum number of output peaks. If MAXNUM_OUT_PEAKS = NUM_SOURCE, this is compatible with HARK version 1.0. If MAXNUM_OUT_PEAKS = 0, all local maxima are output. If MAXNUM_OUT_PEAKS < 0, MAXNUM_OUT_PEAKS is set to NUM_SOURCE. If MAXNUM_OUT_PEAKS > 0, number of output peaks is limited to MAXNUM_OUT_PEAKS."/>
<Parameter name="DEBUG" type="bool" value="true" description="Debug option. If the parameter is true, this node outputs sound localization results to a standard output."/>
</Node>
<Node name="node_SourceTracker_1" type="SourceTracker" x="390" y="100">
<Parameter name="THRESH" type="float" value="20" description="Power threshold for localization results. A localization result with higher power than THRESH is tracked, otherwise ignored."/>
<Parameter name="PAUSE_LENGTH" type="float" value="500" description="Life duration of source in ms. When any localization result for a source is found for more than PAUSE_LENGTH / 10 iterations, the source is terminated. [default: 800]"/>
<Parameter name="MIN_SRC_INTERVAL" type="float" value="20" description="Source interval threshold in degree. When the angle between a localization result and a source is smaller than MIN_SRC_INTERVAL, the same ID is given to the localization result. [default: 20]"/>
<Parameter name="MIN_ID" type="int" value="0" description="Minimum ID of source locations. MIN_ID should be greater than 0 or equal."/>
<Parameter name="DEBUG" type="bool" value="false" description="Output debug information if true [default: false]"/>
</Node>
<Node name="node_DisplayLocalization_1" type="DisplayLocalization" x="1080" y="100">
<Parameter name="WINDOW_NAME" type="string" value="Source Location" description="Window name of the time-azimuth map [default: Window name]"/>
<Parameter name="WINDOW_LENGTH" type="int" value="1000" description="Window length to show at the same time [sample]"/>
<Parameter name="VERTICAL_RANGE" type="object" value="<Vector<int> -180 180>" description="Plot range of the vertical axis"/>
<Parameter name="PLOT_TYPE" type="string" value="AZIMUTH" description="Coordinate setting for the plotting"/>
</Node>
<Node name="node_SourceIntervalExtender_1" type="SourceIntervalExtender" x="650" y="100">
<Parameter name="PREROLL_LENGTH" type="int" value="80" description="Preroll length in frame. [default: 50]"/>
</Node>
<Link from="node_SourceTracker_1" output="OUTPUT" to="node_SourceIntervalExtender_1" input="SOURCES"/>
<Link from="node_SourceIntervalExtender_1" output="OUTPUT" to="node_DisplayLocalization_1" input="SOURCES"/>
<Link from="node_LocalizeMUSIC_1" output="OUTPUT" to="node_SourceTracker_1" input="INPUT"/>
<NetInput name="WAV" node="node_LocalizeMUSIC_1" terminal="INPUT" object_type="Matrix<complex<float>>" description="Multi-channel audio signals. In this matrix, a row is a channel, and a column is a sample."/>
<NetOutput name="OUTPUT" node="node_DisplayLocalization_1" terminal="OUTPUT" object_type="Vector<ObjectRef>" description="The same as input."/>
<NetOutput name="SPECTRUM" node="node_LocalizeMUSIC_1" terminal="SPECTRUM" object_type="any" description="Dynamically added outputs"/>
</Network>
</Document>