-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathHMM.m
375 lines (304 loc) · 14.9 KB
/
HMM.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
classdef HMM < handle
% HMM
%
% Class that handles common computations for hidden Markov models.
% Required inputs: (name, value pairs)
% 'transition': a square matrix of transition probabilities between states
% in the model. matrix is N by N.
% 'emission': a function handle that, when evaluated on a single data
% values, returns an N-element array of emission probabilities of that
% observation from each state of the model.
% 'data': a time-series array of M data points.
%
% Built for data sets with tons of observations compared to the number of
% hidden states. Good for time-series data where there are many noisy
% observations of each hidden state, for example.
%
% Stephen Fleming
% 5/9/2018
properties
% these properties go with the HMM object
% data and model
% sigdata = []; % SignalData object associated with data file, or raw data
data = [];
T = []; % transition matrix
E = @(x) []; % emission probability generating function
init = []; % initial state occupancy probabilities
data_scaling = [1,0];
% generated quantities
viterbi_alignment = [];
end
methods % can be called directly by the user on a particular analysis object
% constructor
function obj = HMM(varargin)
% handle inputs
in = obj.parse_inputs(varargin{:});
% initialize the HMM object by filling in property values
obj.data = in.data;
obj.T = in.transition;
obj.E = in.emission;
obj.init = in.starting_prob;
end
function alignment = viterbi(obj)
% use the Viterbi algorithm to calculate the best possible
% alignment of the data to the model states, given the
% transition matrix
% use default initial probs if none given
if isempty(obj.init)
n = size(obj.T,1);
initial = exp(-(1:n)/5e-1)'/sum(exp(-(1:n)/5e-1));
if numel(initial)>n
initial = initial(1:n) / sum(initial(1:n)); % concatenate and fix to sum to 1
end
else
initial = obj.init;
end
loginit = log10(initial);
% initialize the big matrix 'dp' to trace path
% rows are all possible states in the model
% columns are the observations
% dimension 3 contains: prob, pointer i
dp = zeros(size(obj.T,1), numel(obj.data), 2);
% fill in first column, for first observation
dp(:,1,1) = loginit + log10(obj.E(obj.data(1)));
% precompute specified emission probs and store in matrix:
% this prevents tons of function calls and serves as a look up
% table, which is necessary for large datasets
% (limit the table to about 1 million points, for memory)
num = floor(1e6/size(obj.T,1)); % measurement resolution for emission calculation
range = [min(obj.scaled_data), max(obj.scaled_data)];
currents = linspace(range(1),range(2),num);
div = currents(2) - currents(1);
logEm = log10(obj.E(currents));
% fill in the big matrix
% each element (i,j,1) stores the log prob of the most likely path so far
% and a pointer (i,j,2) to the previous row index of that path
logT = log10(obj.T);
for j = 2:numel(obj.data) % columns are observations
% calculate the emission probs for (nearly) this current
data_ind = floor((obj.scaled_data(j) - range(1))/div) + 1;
for i = 1:size(obj.T,1) % rows are all possible states
% all possible previous states times transition to this state
% times emission for this state
[m, ind] = max( dp(:,j-1,1) + logT(:,i) + logEm(i,data_ind) ); % sum log probabilities
dp(i,j,1) = m; % the probability of the maximally probable path to here
dp(i,j,2) = ind; % row index
end
end
% get the most probable path by finding the last most probable state
[~,ind] = max(dp(:,end,1)); % state index
z = nan(numel(obj.data),1); % best path state indices
log_emissions = nan(numel(obj.data),1); % best path emission probs
z(end) = ind;
data_ind = floor((obj.scaled_data(j) - range(1))/div) + 1;
log_emissions(end) = logEm(z(end),data_ind);
% trace back through the big matrix to get the sequence of states
for j = numel(obj.data):-1:2
z(j-1) = dp(z(j),j,2); % pointer to the previous row index, i.e. state index
% trace through the emissions and record the emission probs
data_ind = floor((obj.scaled_data(j-1) - range(1))/div) + 1;
log_emissions(j-1) = logEm(z(j-1),data_ind);
end
obj.viterbi_alignment.states = z;
obj.viterbi_alignment.log_emissions = log_emissions;
alignment = obj.viterbi_alignment;
end
function alignment = EM_viterbi(obj, max_iter, tol)
% a scaling and offset between the model and data is unknown.
% iteratively (1) estimate scaling and offset, and (2)
% use the Viterbi algorithm to calculate the best possible
% alignment of the scaled data to the model states
if nargin < 2
max_iter = 20;
end
if nargin < 3
tol = 0.0001;
end
f = figure;
obj.data_scaling = obj.data_to_model_transformation(true);
for i = 1:max_iter
display(['EM Viterbi alignment: iteration ' num2str(i)])
obj.viterbi;
f = obj.plot_alignment(['Iteration ' num2str(i)], f);
drawnow;
prev = obj.data_scaling;
obj.data_scaling = obj.data_to_model_transformation;
delta = max((obj.data_scaling - prev)./prev);
if delta < tol
display('Tolerance achieved')
break;
end
end
alignment = obj.viterbi_alignment;
end
function f = plot_alignment(obj, plot_title, fig)
% plot alignment of data to model levels
if isempty(obj.viterbi_alignment)
display('Alignment has not yet been created.')
return;
end
model_means = obj.get_model_values;
if nargin<3
f = figure;
else
f = fig;
clf(f)
end
a = 0;
threshold = -6; % for "noise" alignment emission probability
noise_logic = obj.viterbi_alignment.log_emissions < threshold;
xx = 1:numel(obj.data);
c = colormap(lines(size(obj.T,1)));
for i = 1:size(obj.T,1)
state_logic = obj.viterbi_alignment.states==i;
plot(xx(state_logic & ~noise_logic),obj.data(state_logic & ~noise_logic),'color',c(i,:))
hold on
plot(xx(state_logic & noise_logic),obj.data(state_logic & noise_logic),'o','color',c(i,:))
if sum(obj.viterbi_alignment.states==i)>0
text(a+sum(obj.viterbi_alignment.states==i)/5, ...
(model_means(i)-obj.data_scaling(2))/obj.data_scaling(1)+15, num2str(i), 'fontsize',9)
end
a = a+sum(obj.viterbi_alignment.states==i);
end
ylabel('Data value')
xlabel('Time index')
if nargin<2
title('Viterbi alignment')
else
title(plot_title)
end
set(gca,'fontsize',14,'outerposition',[0.01,0.01,0.98,0.98],'looseinset',[0,0,0,0])
box on;
end
function plot_model(obj)
% plot the states of the HMM as a directed graphical model,
% with edges weighted by their transition probabilities
% create a directed graph object, using Matlab's built-ins
% where edges are the transition matrix elements
Tr = obj.T;
Tr(Tr<1e-4) = 0; % set small probability transitions to zero
ind = round(size(Tr,1)/2); % row index near the middle
inds = 1:size(Tr,1);
Tr2 = Tr;
Tr2(inds~=ind,:) = 0; % select only that one middle row
% plot full graph, with edges having thickness proportional to prob
figure
g = digraph(Tr);
f = plot(g);
f.NodeColor = [0 0 0];
f.MarkerSize = 8;
g.Edges.LWidths = 7*g.Edges.Weight/max(g.Edges.Weight);
f.LineWidth = g.Edges.LWidths;
title('Full model of state transitions')
% plot full graph, with edges having thickness proportional to prob
figure
g2 = digraph(Tr2);
f2 = plot(g2);
f2.NodeColor = [0 0 0];
f2.MarkerSize = 8;
g2.Edges.LWidths = 7*g2.Edges.Weight/max(g2.Edges.Weight);
f2.LineWidth = g2.Edges.LWidths;
title(['Transitions from state ' num2str(ind) ' in model'])
end
end
methods (Access = public) % only called by class methods
function p = data_to_model_transformation(obj, naive)
% suppose there's a linear function mapping the "true" model to
% the model we're using (scale and offset parameters) that are
% unknown. the data were taken at a different temperature,
% salt concentration, whatever, so the scaling is off. we
% don't have explicit access to the model state information,
% only the emission probability function. we would like to
% apply a scaling and offset to the model states, but we can
% just as easily apply the inverse to the data. then we'll
% know that in reality, we need to do the inverse scaling to
% the model.
% this does an L1 regression to fit the data to the model.
% get the max likelihood emission values for each model state
model_means = obj.get_model_values();
% is this run supposed to be from a naive starting state?
if nargin<2
naive = false;
end
% if we haven't aligned yet, make an inital guess about the
% model scaling and offset by fitting a line
if naive || isempty(obj.viterbi_alignment)
% randomly grab points from model and data
m = datasample(model_means',500);
d = datasample(obj.data,500);
% sort them in order, adding noise to the model
blur = range(obj.data)/size(obj.T,1)/2; % empirical estimate
m = sort(m + randn(size(m))*blur);
d = sort(d);
else
% we do have a viterbi alignment, so use that information
% to do a linear fit of model to data
m = model_means(unique(obj.viterbi_alignment.states))';
d = accumarray(obj.viterbi_alignment.states,obj.data,[],@median);
d = d(d~=0);
end
% fit a line and return the best fit params
%[scale, offset] = polyfit(m,d,1); % polynomial order 1, a line
try
p = robustfit(d,m); % better at ignoring outliers
p = fliplr(p'); % [scale, offset]
catch ex
p = [m\d, 0];
end
%offset = p(2);
%scale = p(1);
end
function d = scaled_data(obj, inds)
% return a scaled version of the data according to the object
% parameters in data_scaling
if nargin < 2
d = obj.data*obj.data_scaling(1) + obj.data_scaling(2);
else
d = obj.data(inds)*obj.data_scaling(1) + obj.data_scaling(2);
end
end
function model_means = get_model_values(obj)
% we don't know the model explicitly...
% approximate the model levels by doing a max over emissions
current = (min(obj.data)-100):(max(obj.data)+100); % currents
logEm = zeros(numel(current),size(obj.T,1)); % allocate space
for j = 1:numel(current) % for each current in the data range
logEm(j,:) = log10(obj.E(current(j))); % get emissions
end
[~,inds] = max(logEm,[],1); % max prob emission index
model_means = current(inds); % max prob emission current
end
function in = parse_inputs(obj, varargin)
% parse all inputs so all methods can use them easily
p = inputParser;
% defaults and checks
check_trans_matrix = @(x) all([isnumeric(x), ...
all(x>=0), ...
all(x<=1), ...
size(x,1)==size(x,2)]);
check_emission = @(x) all([isnumeric(feval(x,[1,2,3])), ...
all(feval(x,[1,2,3])>=0), ...
all(feval(x,[1,2,3])<=1)]);%, ...
%numel(feval(x,[1,2,3]))==3]);
check_init = @(x) all([isnumeric(x), ...
all(x>=0)]);
% set up the inputs
addOptional(p, 'data', [], @isnumeric); % data observed
addOptional(p, 'emission', [], check_emission); % emission prob function handle
addOptional(p, 'transition', [], check_trans_matrix); % transition prob matrix
addOptional(p, 'starting_prob', [], check_init); % starting probability array
% parse
parse(p,varargin{:});
in = p.Results;
% impose normalization on transition probabilities
% i.e. rows must sum to one
in.transition = obj.row_normalize(in.transition);
in.starting_prob = in.starting_prob / sum(in.starting_prob); % normalize
end
function n = row_normalize(~, m)
% normalize rows of matrix m, so they add to one
% return normalized matrix
n = m ./ repmat(sum(m,2), 1, size(m,1));
end
end
end