-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmapAdapt.m
149 lines (132 loc) · 4.81 KB
/
mapAdapt.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
function gmm = mapAdapt(dataList, ubmFilename, tau, config, gmmFilename)
% MAP-adapts a speaker specific GMM gmmFilename from UBM ubmFilename using
% features in dataList. The MAP relevance factor can be specified via tau.
% Adaptation of all GMM hyperparameters are supported.
%
% Inputs:
% - dataList : ASCII file containing adaptation feature file name(s)
% or a cell array containing feature(s). Feature files
% must be in uncompressed HTK format.
% - ubmFilename : file name of the UBM or a structure containing
% the UBM hyperparameters that is,
% (ubm.mu: means, ubm.sigma: covariances, ubm.w: weights)
% - tau : the MAP adaptation relevance factor (19.0)
% - config : any sensible combination of 'm', 'v', 'w' to adapt
% mixture means (default), covariances, and weights
% - gmmFilename : the output speaker specific GMM file name (optional)
%
% Outputs:
% - gmm : a structure containing the GMM hyperparameters
% (gmm.mu: means, gmm.sigma: covariances, gmm.w: weights)
%
% References:
% [1] D.A. Reynolds, T.F. Quatieri, and R.B. Dunn, "Speaker verification
% using adapted Gaussian mixture models," Digital Signal Process.,
% vol. 10, pp. 19-41, Jan. 2000.
%
%
% Omid Sadjadi <s.omid.sadjadi@gmail.com>
% Microsoft Research, Conversational Systems Research Center
if ( nargin < 3 ),
tau = 19.0; % MAP adaptation relevance factor
end
if ( nargin < 4 ), config = ''; end;
if ischar(tau), tau = str2double(tau); end
if isempty(config), config = 'm'; end
if ischar(ubmFilename),
tmp = load(ubmFilename);
ubm = tmp.gmm;
elseif isstruct(ubmFilename),
ubm = ubmFilename;
else
error('oh dear! ubmFilename should be either a string or a structure!');
end
gmm = ubm;
if ischar(dataList) || iscellstr(dataList),
dataList = load_data(dataList);
end
if ~iscell(dataList),
error('Oops! dataList should be a cell array!');
end
nfiles = length(dataList);
N = 0; F = 0; S = 0;
parfor file = 1 : nfiles,
[n, f, s] = expectation(dataList{file}, ubm);
N = N + n; F = F + f; S = S + s;
end
if any(config == 'm'),
alpha = N ./ (N + tau); % tarde-off between ML mean and UBM mean
m_ML = bsxfun(@rdivide, F, N);
m = bsxfun(@times, ubm.mu, (1 - alpha)) + bsxfun(@times, m_ML, alpha);
gmm.mu = m;
end
if any(config == 'v'),
alpha = N ./ (N + tau);
v_ML = bsxfun(@rdivide, S, N);
v = bsxfun(@times, (ubm.sigma+ubm.mu.^2), (1 - alpha)) + bsxfun(@times, v_ML, alpha) - (m .* m);
gmm.sigma = v;
end
if any(config == 'w'),
alpha = N ./ (N + tau);
w_ML = N / sum(N);
w = bsxfun(@times, ubm.w, (1 - alpha)) + bsxfun(@times, w_ML, alpha);
w = w / sum(w);
gmm.w = w;
end
if ( nargin == 5 ),
% create the path if it does not exist and save the file
path = fileparts(gmmFilename);
if ( exist(path, 'dir')~=7 && ~isempty(path) ), mkdir(path); end
save(gmmFilename, 'gmm');
end
function data = load_data(datalist)
% load all data into memory
if ~iscellstr(datalist)
fid = fopen(datalist, 'rt');
filenames = textscan(fid, '%s');
fclose(fid);
filenames = filenames{1};
else
filenames = datalist;
end
nfiles = size(filenames, 1);
data = cell(nfiles, 1);
for ix = 1 : nfiles,
data{ix} = htkread(filenames{ix});
end
function [N, F, S, llk] = expectation(data, gmm)
% compute the sufficient statistics
[post, llk] = postprob(data, gmm.mu, gmm.sigma, gmm.w(:));
N = sum(post, 2)';
F = data * post';
S = (data .* data) * post';
function [post, llk] = postprob(data, mu, sigma, w)
% compute the posterior probability of mixtures for each frame
post = lgmmprob(data, mu, sigma, w);
llk = logsumexp(post, 1);
post = exp(bsxfun(@minus, post, llk));
function logprob = lgmmprob(data, mu, sigma, w)
% compute the log probability of observations given the GMM
ndim = size(data, 1);
C = sum(mu.*mu./sigma) + sum(log(sigma));
D = (1./sigma)' * (data .* data) - 2 * (mu./sigma)' * data + ndim * log(2 * pi);
logprob = -0.5 * (bsxfun(@plus, C', D));
logprob = bsxfun(@plus, logprob, log(w));
function y = logsumexp(x, dim)
% compute log(sum(exp(x),dim)) while avoiding numerical underflow
xmax = max(x, [], dim);
y = xmax + log(sum(exp(bsxfun(@minus, x, xmax)), dim));
ind = find(~isfinite(xmax));
if ~isempty(ind)
y(ind) = xmax(ind);
end
function [data, frate, feakind] = htkread(filename)
% read features with HTK format (uncompressed)
fid = fopen(filename, 'rb', 'ieee-be');
nframes = fread(fid, 1, 'int32'); % number of frames
frate = fread(fid, 1, 'int32'); % frame rate in nano-seconds unit
nbytes = fread(fid, 1, 'short'); % number of bytes per feature value
feakind = fread(fid, 1, 'short'); % 9 is USER
ndim = nbytes / 4; % feature dimension (4 bytes per value)
data = fread(fid, [ndim, nframes], 'float');
fclose(fid);