function M = gregexp2dfsa_fast(R,localf) global RCATALOG global MCATALOG global oldALF if ~exist('localf','var'), global ALF, localf = ALF; else, global ALF, ALF = localf; end R = strrep(R,' ',''); if ~isequal(ALF,oldALF) % problems will only arise if oldALF is a proper subset of ALF %if setcontains(ALF,oldALF) % the alphabet has changed -- clear memory RCATALOG = {}; MCATALOG = {}; oldALF=ALF; %end end if isempty(RCATALOG) RCATALOG = {}; MCATALOG = {}; end ind = strmatch(R,RCATALOG,'exact'); if ~isempty(ind) M = MCATALOG{ind}; else M = gregexp2dfsa_loc(R); end return function M = gregexp2dfsa_loc(reg) % very slightly modified version of standalone function global RCATALOG global MCATALOG global ALF nalf = ['@' ALF]; if all(ismember(reg,nalf)) %it's just a string M = str2dfsa(reg,ALF); else % too spoiled by matlab to write a regexp parsing routine! [chunk,rest] = peeloff(reg); if isempty(chunk) % we must have peeled off a () M = null_dfsa; else M = gregexp2dfsa_fast(chunk,ALF); end if ~isempty(rest) if (rest(1)=='*') M = kleene_nfsa(d2nfsa(M)); M = prune_eps(M); M = remove_unreachable(M); M = n2dfsa(M); rest = rest(2:end); end if ~isempty(rest) if (rest(1)=='+') rest = rest(2:end); M = union_dfsa(M,gregexp2dfsa_fast(rest,ALF)); elseif (rest(1)=='-') rest = rest(2:end); M = intersect_dfsa(M,negate_fsa(gregexp2dfsa_fast(rest,ALF))); elseif (rest(1)=='^') rest = rest(2:end); M = intersect_dfsa(M,gregexp2dfsa_fast(rest,ALF)); else M = concat_nfsa(d2nfsa(M),d2nfsa(gregexp2dfsa_fast(rest,ALF))); M = prune_eps(M); M = remove_unreachable(M); M = n2dfsa(M); end end end end % minimize M = minimize_dfsa(M); RCATALOG{end+1} = reg; MCATALOG{end+1} = M; return function M = gregexp2dfsa_loc_old(reg) % very slightly modified version of standalone function global RCATALOG global MCATALOG %if ~exist('localf','var'), global ALF, localf = ALF; else, global ALF, ALF = localf; end global ALF nalf = ['@' ALF]; %fprintf('%s \n',reg) if all(ismember(reg,nalf)) %it's just a string M = str2nfsa(reg,ALF); else % too spoiled by matlab to write a regexp parsing routine! [chunk,rest] = peeloff(reg); if isempty(chunk) % we must have peeled off a () M = null_nfsa; else M = gregexp2dfsa_fast(chunk,ALF); M = d2nfsa(M); end if ~isempty(rest) if (rest(1)=='*') M = kleene_nfsa(M); rest = rest(2:end); end if ~isempty(rest) if (rest(1)=='+') rest = rest(2:end); M = union_nfsa(M,d2nfsa(gregexp2dfsa_fast(rest,ALF))); elseif (rest(1)=='-') rest = rest(2:end); M = intersect_nfsa(M,d2nfsa(negate_fsa(gregexp2dfsa_fast(rest,ALF)))); elseif (rest(1)=='^') rest = rest(2:end); M = intersect_nfsa(M,d2nfsa(gregexp2dfsa_fast(rest,ALF))); else M = concat_nfsa(M,d2nfsa(gregexp2dfsa_fast(rest,ALF))); end end end end %remove unreachable states: M = prune_eps(M); M = remove_unreachable(M); % determinize M = n2dfsa(M); % minimize M = minimize_dfsa(M); RCATALOG{end+1} = reg; MCATALOG{end+1} = M; return function [chunk,rest]=peeloff(str) str = [str,'()']; x=find(str=='('); if (x(1)>1) % we can peel off a char chunk = str(1); rest = str(2:end); % keep that open paren else % it starts with a paren cnt=0; t=1; done=0; while ~done if (str(t)==')') cnt=cnt-1; elseif (str(t)=='(') cnt=cnt+1; end t=t+1; done = (~cnt+(t>length(str))); end if cnt error('unmatched parentheses in regexp'); end chunk = str(2:t-2); rest = str(t:end); end rest = rest(1:end-2); return