function M = gregexp2dfsa(reg,localf) % convert a GENERALIZED regular expression to a DFSA % see str2dfsa.m for basic dfsa definitions %note! these aren't quite the same as matlab's regexps! %here: % () is grouping % * is Kleene star % + is disjunction % - is negation (new) % ^ is conjunction (new) % @ is empty string % NOTE: % do not confuse () and @ -- they are different objects! % thus, L(@) = {@} = the language consisting of the single empty string % but L(()) = {} = the empty language if ~exist('localf','var'), global ALF, localf = ALF; else, global ALF, ALF = localf; end %global ALF nalf = ['@' ALF]; %fprintf('%s \n',reg) if all(ismember(reg,nalf)) %it's just a string %M = str2nfsa(reg,localf); M = str2dfsa(reg,localf); else % too spoiled by matlab to write a regexp parsing routine! [chunk,rest] = peeloff(reg); if isempty(chunk) % we must have peeled off a () %M = null_nfsa; M = null_dfsa; else M = gregexp2dfsa(chunk,localf); %M = d2nfsa(M); end if ~isempty(rest) if (rest(1)=='*') %M = kleene_nfsa(M); M = kleene_nfsa(d2nfsa(M)); M = prune_eps(M); M = remove_unreachable(M); %% determinize M = n2dfsa(M); rest = rest(2:end); end if ~isempty(rest) if (rest(1)=='+') rest = rest(2:end); %M = union_nfsa(M,d2nfsa(gregexp2dfsa(rest,localf))); M = union_dfsa(M,gregexp2dfsa(rest,localf)); elseif (rest(1)=='-') rest = rest(2:end); %M = intersect_nfsa(M,d2nfsa(negate_fsa(gregexp2dfsa(rest,localf)))); M = intersect_dfsa(M,negate_fsa(gregexp2dfsa(rest,localf))); elseif (rest(1)=='^') rest = rest(2:end); %M = intersect_nfsa(M,d2nfsa(gregexp2dfsa(rest,localf))); M = intersect_dfsa(M,gregexp2dfsa(rest,localf)); else %M = concat_nfsa(M,d2nfsa(gregexp2dfsa(rest,localf))); M = concat_nfsa(d2nfsa(M),d2nfsa(gregexp2dfsa(rest,localf))); %%remove unreachable states: M = prune_eps(M); M = remove_unreachable(M); %% determinize M = n2dfsa(M); end end end end %%remove unreachable states: %M = prune_eps(M); %M = remove_unreachable(M); %% determinize %M = n2dfsa(M); % minimize M = minimize_dfsa(M); %M2 = regexp2dfsa(reg,localf); %if ~equal_dfsa(M,M2) % keyboard %end %M=M2; return function [chunk,rest]=peeloff(str) str = [str,'()']; x=find(str=='('); if (x(1)>1) % we can peel off a char chunk = str(1); rest = str(2:end); % keep that open paren else % it starts with a paren cnt=0; t=1; done=0; while ~done if (str(t)==')') cnt=cnt-1; elseif (str(t)=='(') cnt=cnt+1; end t=t+1; done = (~cnt+(t>length(str))); end if cnt error('unmatched parentheses in regexp'); end chunk = str(2:t-2); rest = str(t:end); end rest = rest(1:end-2); return