-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathget_model_levels_M2.m
74 lines (62 loc) · 2.83 KB
/
get_model_levels_M2.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
function [pA, pA_std, std_mean, scale, offset] = get_model_levels_M2(seq, measured_levels)
% get_model_levels_M2(seq, measured_levels) returns current levels for
% sequence 'seq' that are predicted by M2-MspA measurements by Laszlo et al
% and scaled to mesh with the empirical CDF of the levels passed in,
% 'measured_levels'.
% Stephen Fleming, 6/1/16
% Takes in a sequence in the form of 'ATCGTTCAAAGC...' with 'R'
% denoting abasic sites
% generate "states"
k = 5;
states = get_states(nt2int(seq), k); % k for k-mer
% get the current levels from my own M2 model
d = load('M2_model.mat');
pA = nan(size(states));
std_mean = ones(size(states));
% real parts are the parts without any abasics
pA(imag(states)==0) = d.M2_model.level_mean(states(imag(states)==0)); % convert into something like pA
pA_std(imag(states)==0) = d.M2_model.level_stdv(states(imag(states)==0));
std_mean(imag(states)==0) = d.M2_model.sd_mean(states(imag(states)==0));
% imaginary parts are the abasic parts (we have to search the model)
[~,iminds] = find(imag(states)~=0);
for i = iminds
strstate = seq(i:(i+k-1)); % get the string sequence for abasic kmer
abasicind = find(cellfun(@(x) strcmp(strstate,x), cellstr(d.M2_model_abasics.kmer')),1,'first');
pA(i) = d.M2_model_abasics.level_mean(abasicind); % find the matching string and get level info
pA_std(i) = d.M2_model_abasics.level_stdv(abasicind);
end
% % scale the current levels to match the scaling of the measured levels
%
% [cdf1,lev1] = ecdf(pA);
% [cdf2,lev2] = ecdf(measured_levels);
%
% function value = fun(m,b,cdf1,cdf2,lev1,lev2)
% value = sum((interp1(cdf2,lev2,linspace(0.05,0.95,10),'linear','extrap')'-(m.*interp1(cdf1,lev1,linspace(0.05,0.95,10),'linear','extrap')'+b)).^2);
% end
%
% params = fminsearch(@(p) fun(p(1),p(2),cdf1,cdf2,lev1,lev2), [std(lev1)/std(lev2), mean(lev1)-mean(lev2)]);
% scale = params(1);
% offset = params(2);
%
% pA = pA * scale + offset;
% pA_std = pA_std * scale;
% scale the current levels to match the scaling of the measured levels
xx = 0:400;
[y1,x] = hist(pA,0:5:400);
y1 = y1/sum(y1);
y1 = min(y1,0.03);
[y2,~] = hist(measured_levels,x);
y2 = y2/sum(y2);
y2 = min(y2,0.03);
y1 = interp1(x,y1,0:400);
y2 = interp1(x,y2,0:400);
function value = fun(m,b,y1,y2)
yy1 = interp1((0:400)*m+b,y1,0:400,'linear','extrap'); % scale the levels and reinterpolate
value = sum((y2-yy1).^2);
end
params = fminsearch(@(p) fun(p(1),p(2),y1,y2), [range(measured_levels)/range(pA), min(measured_levels)-min(pA)*range(measured_levels)/range(pA)]);
scale = params(1);
offset = params(2);
pA = pA * scale + offset;
pA_std = pA_std * scale;
end