function good = shattersample_crude(X) % can we shatter the sample X? Should be able to global alf nx = length(X); U = getU(X); lU = []; alf = unique(char(U))'; alf = alf(2:end); Mpos = null_dfsa(alf); %Mneg = null_dfsa(alf); nu = length(U); MU = {}; for i=1:nu ui = U{i}; lU(i) = length(ui); if isempty(ui) ui = '@'; end fprintf('processing U = %s \n',ui); MU{i} = bool2fsa_pt(['(' ui ')'],alf); end MU=MU(:); XXi = []; for i=1:length(X) xi = embedX(X{i},MU); XXi = [XXi; xi(:)']; end shufid = getshuffids(XXi,lU); tic PROGstart(2^nx) for i=1:2^nx Y = zeros(nx,1); ystr = dec2bin(i-1,nx); ii1 = find(ystr=='1'); Y(ii1) = ones(length(ii1),1); Y = 2*Y - 1; %alpha = Y ./ sum(XXi,2); %w = getw(X,MU,alpha); %w = getw(XXi,Y,lU); w = getw(XXi,Y,lU,shufid); y_hat = sign(XXi * w); good = all(Y==y_hat); if ~good keyboard end PROGupdate(i); %fprintf(' %1.9f done; seconds = %1.5f \n',i/2^N,toc); end PROGend; NODISPLAY = 0; return function h = eta(k) global alf if k==0 h = 1; else h = 1; for i=0:k-1 h = h + eta(i)*choose_appr(k,i); end end return function shufid = getshuffids(XXi,lU) [nx,nd] = size(XXi); shufid = zeros(nx,1); for i=1:nx try shufid(i) = longest_shuff(XXi(i,:), lU); catch '(1) shuf id not unique!!!' keyboard end % will cause an error if more than one end usi = unique(shufid); if length(usi) < length(shufid) '(2) shuf id not unique!!!' keyboard end return shufid = getshuffids(XXi,lU); tic PROGstart(2^nx) for i=1:2^nx Y = zeros(nx,1); ystr = dec2bin(i-1,nx); ii1 = find(ystr=='1'); Y(ii1) = ones(length(ii1),1); Y = 2*Y - 1; %alpha = Y ./ sum(XXi,2); %w = getw(X,MU,alpha); w = getw(XXi,Y,lU); y_hat = sign(XXi * w); good = all(Y==y_hat); if ~good keyboard end PROGupdate(i); %fprintf(' %1.9f done; seconds = %1.5f \n',i/2^N,toc); end PROGend; NODISPLAY = 0; return function h = eta(k) global alf if k==0 h = 1; else h = 1; for i=0:k-1 h = h + eta(i)*choose_appr(k,i); end end return function shufid = getshuffids(XXi,lU) [nx,nd] = size(XXi); shufid = zeros(nx,1); for i=1:nx try shufid(i) = longest_shuff(XXi(i,:), lU); catch '(1) shuf id not unique!!!' keyboard end % will cause an error if more than one end usi = unique(shufid); if length(usi) < length(shufid) '(2) shuf id not unique!!!' keyboard end return function w = getw(XXi,Y,lU,shufid) % takes a collection of "support vector" strings X % and their coefficients alpha % computes corresponding weights w % this is for the piecewise testable languages [nx,nd] = size(XXi); w = zeros(nd,1); for i=1:nx j = shufid(i); w(j) = Y(i) * eta(lU(j)); end return function w = getw0(XXi,Y,lU) % takes a collection of "support vector" strings X % and their coefficients alpha % computes corresponding weights w % this is for the piecewise testable languages [nx,nd] = size(XXi); shufid = zeros(nx,1); for i=1:nx try shufid(i) = longest_shuff(XXi(i,:), lU); catch '(1) shuf id not unique!!!' keyboard end % will cause an error if more than one end usi = unique(shufid); if length(usi) < length(shufid) '(2) shuf id not unique!!!' keyboard end % if all works out, shufid(i) is the unique shuffle element % that identifies the vector xi = XXi(i,:) w = zeros(nd,1); for i=1:nx j = shufid(i); w(j) = Y(i) * eta(lU(j)); end return function uii = longest_shuff(xi, lU) xi=xi(:); lU=lU(:); ii = find(xi); ll = lU(ii); ml = max(ll); if ml==0 uii = find(lU==0); else uii = find(lU.*xi==ml); end return function y_hat = compute_yhat_gram(G,xii,alpha,b) GX = G(:,xii); y_hat = sign(b + GX*alpha); return nx = length(xii); % for how many x's are we evaluating? y_hat = zeros(nx,1); for n=1:nx j = xii(n); Gj = G(:,j); s = b + alpha'*Gj; %s = b; %for i=1:length(alpha) % s = s + alpha(i) * G(i,j); %end y_hat(n) = sign(s); end return function U = getU(X) % takes a collection of "support vector" strings X % computes the explicit features (regular sets) U % this is for the piecewise testable languages U = {}; for i=1:length(X) x = X{i}; S = all_subseq(x); for j=1:length(S) s = S{j}; ind = strmatch(s,U,'exact'); if isempty(ind) U{end+1} = s; end end end [U,y] = sort(U); return function xi = embedX(x,MU) xi = zeros(size(MU)); for i=1:length(xi) xi(i) = run_dfsa(MU{i},x); end return function w = getw_old(X,MU,alpha) % takes a collection of "support vector" strings X % and their coefficients alpha % computes corresponding weights w % this is for the piecewise testable languages w = zeros(size(MU)); for i=1:length(X) x = X{i}; xi = embedX(x,MU); w = w + xi * alpha(i); end return function S = all_subseq(s) n = length(s); POWSET = list_all_ind(repmat([2],[n 1]))-1; S = repmat({s},size(POWSET,1),1); for j = 1:length(S) S{j} = s(find(POWSET(j,:))); end S = unique(S); return