function erasketc(M,N) curd = pwd; cd ../string_proc/combinatorics/ addpcurdir cd(curd) alf = 'ab'; global nn ee %nn = []; %ee = []; %for n=1:1000 % M=n*2; % N=n; mrat = 0; RR = []; while 1 %for t=1:10000 %b = round(rand); % indep? b = 1; x1 = get_datum(alf,M); x2 = get_datum(alf,M); if b y1 = get_datum(alf,N); y2 = get_datum(alf,N); else y1 = rand_erase(x1,N); y2 = rand_erase(x2,N); end C1 = count_subseq(x1,y1); C2 = count_subseq(x2,y2); %Klip = M*N^2; %Klip = 1; Klip = exp(M^(3/4)); %Klip = M^(sqrt(M)); %Klip = M^N; %Klip = log(M)^(M^(3/4)); if rho([x1 y1],[x2 y2])>0 Rlip = abs(C1-C2) / rho([x1 y1],[x2 y2]); %Rlip = Rlip/Klip; %Rlip = round(Rlip); %RR(end+1) = Rlip; RR(end+1) = (Rlip <= Klip); %hist(RR,100); %drawnow fprintf('E = %1.9f \n',mean(RR)); %fprintf('E = %d \n',round(mean(RR))); end %Klip = exp(logchoose(M,N)); % too big %Klip = M^5; % too small %Klip = (M/N)^N; % too small %Klip = (M/log(M))^(N); % too big %r = abs(C1-C2) / rho([x1 y1],[x2 y2]) / Klip; %mrat = max(r,mrat); %fprintf('mrat = %1.12f \n',mrat); end % nn(end+1) = n; % ee(end+1) = mean(RR) % plot(nn,ee); % drawnow %end return T=1e6; E0 = indepExp(M,N); ntr = M; %ntr = 1; acc = 0; k = 1; %k = N; %fp = 1; % false post (indep) %fn = 1; % false neg (indep) for t=1:T b = round(rand); % indep? X = 0; for i=1:ntr x = get_datum(alf,M); if b y = get_datum(alf,N); else y = rand_erase(x,N); end X = X + count_subseq(x,y); end p = (X/ntr< k*E0); %if p > b % fp = fp + 1; %elseif p < b % fn = fn + 1; %end %k = k * fn/fp acc = acc + (p==b); fprintf('trials = %d; accuracy = %1.5f \n',t,acc/t); end return maxr = 0; for m=1:M for n=1:m K = total_subsec_cnt(m,n); r = choose(m,n) * 2^(m-n) / K * (m+1); maxr = max(r,maxr); fprintf('m = %2d n = %2d rat = %1.12f mrat = %1.12f \n',m,n,r,maxr); end end return T = 1e10; %E = indepExp(M,N); E = eraseEmp(M,N); alf = 'ab'; p = (M-N)/M; nn = 0; global m n P1 P0 = eraseproc(M,N); P1 = P0*0; Cemp = P1; m = 2; n = M + N; for t=1:T x = get_datum(alf,M); %y = get_datum(alf,N); %y = rand_erase(x,p); y = rand_erase(x,N); %if veq(x,'abb') & veq(y,'ba') % keyboard %end xi = c2i([x y]-'a'); Cemp(xi) = Cemp(xi)+1; P1 = Cemp/sum(Cemp); tv = .5*totdif(P0,P1); nstr = count_subseq(x,y); nn = nn + nstr; fprintf('E = %1.8f A = %1.8f tv = %1.8f \n',E,nn/t,tv); end return function E = indepExp(M,N) E = exp(logchoose(M,N) + N*log(1/2)); return function E = eraseEmp(M,N) global m n m = 2; n = M + N; E = 0; Csum = 0; for xi = 1:m^n xy = i2c(xi); x = xy(1:M); y = xy(M+1:M+N); Csum = Csum + count_subseq(x,y)^2; end E = (1/2)^M * Csum / choose(M,N); return function E = eraseExp(M,N) p = 0; for f=0:M-length(y) %x = logchoose(M,N-f) - logchoose(M,N) + f*log(1/2); x = logchoose(M,N-f) + f*log(1/2); p = p + exp(x); end E = p; return function x = get_datum(alf,n) na = length(alf); x = alf(ceil(rand(1,n)*na)); return function y = rand_erase(x,N) ii = randperm(length(x)); ii = ii(1:N); ii = sort(ii); y = x(ii); return y = []; for i=1:length(x) if rand > p y(end+1) = x(i); end end return