function S = shufprod(x,y) ALIGNS = ed_align({x,y}); S = {}; for i=1:length(ALIGNS) A = ALIGNS{i}; s = ''; for j=1:size(A,2) c = A(:,j); c = strrep(c','-',''); if length(c)>1 c = c(1); end s(end+1) = c; end S{end+1} = s; end S = unique(S); return function ALIGNS = ed_align(U) % edit-distance align % hyperlatice algorithm global SWP_COST global DEL_COST global CYL_WIDTH global HyperLattice global PHI global ALIGNS SWP_COST = Inf; DEL_COST = 0; CYL_WIDTH = Inf; % only explore paths that don't deviate % from the main hyper-diagonal by CYL_WIDTH N = length(U); L=zeros(1,N); for i=1:N, L(i)=length(U{i}); end % the string lengths dims = L+1; D = dims / sqrt(dims * dims'); dd = repmat([2],[1,N]); twoN = 2^N; twoN0 = zeros(1,twoN-1); dims0 = zeros(1,N); binstr0 = repmat('0',size(dims)); HyperLattice = zeros(1,prod(dims)); PHI = cell(1,prod(dims)); % PHI is all the ways of getting to HyperLattice(i,j,..) %PROGstart(prod(dims),'Aligning...'); for indIJ = 2:prod(dims) ijk = ind2coord(indIJ,dims); x = ijk - (ijk * D') * D; dist = sqrt(x*x'); if (dist <= CYL_WIDTH) ycc = cost_coeff(ijk,dims); x = find(ijk~=1); % determining if a point lies on an edge if (length(x)==1) % we're on an edge eijk = ijk; eijk(x) = eijk(x) - 1; indIJ0 = coord2ind(eijk,dims); dijk = ijk - eijk; col = d2chars(ijk-1,dijk,U); cc = colcost(col,ijk,dims,ycc); HyperLattice(indIJ) = HyperLattice(indIJ0) + cc; % points on edges have the pleasant property that % there's only one way to get to them PHI{indIJ} = coord2ind(eijk,dims); else % we're not on an edge % look in all the "back" directions: % inds = 2:2^N; costs = Inf+twoN0; INDS = twoN0; for ind=1:twoN-1 %dijk = ind2coord(ind,dd)-1; % this is slow % alternative way: dijk = dims0; binstr1 = dec2bin(ind); binstr = binstr0; binstr(end-length(binstr1)+1:end)=binstr1; z = find(binstr=='1'); dijk(z)=1; eijk = ijk - dijk; if all(eijk) col = d2chars(ijk-1,dijk,U); cc = colcost(col,ijk,dims,ycc); indIJ1 = coord2ind(eijk,dims); oc = HyperLattice(indIJ1); costs(ind) = cc + oc; INDS(ind) = indIJ1; end end [x,y] = min(costs); Y = find(costs==x); HyperLattice(indIJ) = x; PHI{indIJ} = INDS(Y); end else HyperLattice(indIJ) = Inf; % we're in the forbidden zone end %PROGupdate(indIJ); end %PROGend; ALIGNS = {}; trace_tens('',prod(dims),dims,U); return function col = d2chars(ijk,dijk,U) % map the positions and offsets to a column of characters col = ''; for n=1:length(ijk) if dijk(n) col(end+1)=U{n}(ijk(n)); else col(end+1)='-'; end end return function y = cost_coeff(x,L) % the cost-coefficient of an operation % as a function of it position in the hyperlattice % and hyperlattice dimensions % the trivial "no-cost" option % use this to yield all possible alignments %y = zeros(size(x)); % the simplest is to set y = ones(size(x)); % this is heuristic that is natural for language % normalized quadratic % want all "warps" to have const. area = 1 %x = x-1; %a = 6./L.^3; %y = a.*x.*(L-x); return function cost = colcost(col,ijk,dims,ycc) global SWP_COST global DEL_COST % sum of all the pairwise char costs % idea: the cost of a column should depend on its % position in the hyper-lattice cost = 0; n = length(col); for i=1:n for j=i+1:n ci = col(i); cj = col(j); if (ci~=cj) isdel = (ci=='-') + (cj=='-'); %y1 = cost_coeff(ijk(i),dims(i)); %y2 = cost_coeff(ijk(j),dims(j)); %y = y1 + y2; y = ycc(i) + ycc(j); if isdel cost = cost + y * DEL_COST; else cost = cost + y * SWP_COST; end end end end return function trace_tens(align,indIJ,dims,U) % so that we don't waste memory passing them around global PHI global ALIGNS if (indIJ > 1) Y = PHI{indIJ}; ijk = ind2coord(indIJ,dims); for y=1:length(Y) indIJ1 = Y(y); eijk = ind2coord(indIJ1,dims); dijk = ijk - eijk; col = d2chars(ijk-1,dijk,U); col = col(:); align1 = [col,align]; trace_tens(align1,indIJ1,dims,U); end else ALIGNS{end+1}=align; end return